HEAD
df_us_prev <- read_csv('US_prevalence.csv')
df_us_prev <- df_us_prev %>%
select(fips, date, rate) %>%
mutate(date = as.Date(date, "%d%b%Y")) %>%
dplyr::rename(county_fips = fips,
rate_day = rate) %>%
mutate(county_fips = as.character(county_fips)) %>%
filter(rate_day >= 0 & rate_day <= 1000) %>%
filter(date <= '2020-04-15')
df_us_prev %>% write_csv('us_prev_fips.csv')
df_us_death <- read_csv('US_prevalence.csv')
df_us_death <-df_us_death %>%
mutate(date = as.Date(date, "%d%b%Y"),
fips = as.character(fips))
df_us_death_0930 <- df_us_death %>%
filter(date == '2020-09-30') %>%
dplyr::rename(county_fips = fips,
case_rate_0930 = rate,
death_rate_0930 = rate_death) %>%
select(county_fips, case_rate_0930, death_rate_0930)
df_us_death_0415 <- df_us_death %>%
filter(date == '2020-04-15') %>%
dplyr::rename(county_fips = fips,
case_rate_0415 = rate,
death_rate_0415 = rate_death) %>%
select(county_fips, case_rate_0415, death_rate_0415)
df_us_death <- merge(df_us_death_0415, df_us_death_0930)
# save df
df_us_death %>% write_csv('us_death_county_maps.csv')
df_us_pers <- read_csv('US_personality_2.csv')
df_us_pers %>% filter(freq >= 50) %>% .$freq %>% sd()
## [1] 3175.839
df_us_pers <- df_us_pers %>%
dplyr::rename(county_fips = county,
pers_o = open,
pers_c = sci,
pers_e = extra,
pers_a = agree,
pers_n = neuro) %>%
filter(freq >= 50) %>%
select(-county_name, -freq) %>%
mutate(county_fips = as.character(county_fips))
df_us_pers %>% .$freq %>% mean
## [1] NA
df_us_ctrl <- read.csv('US_controls_2.csv')
df_us_ctrl <- df_us_ctrl %>% select(-county_name) %>%
rename(county_fips = county,
airport_dist = airport_distance,
conservative = republican,
age = medage,
healthcare = physician_pc) %>%
mutate(county_fips = as.character(county_fips),
male=male*100,
tourism=tourism*100,
manufact=manufact*100,
academics=academics*100)
df_us_temp <- read_csv("US_controls_weather.csv") %>%
filter(month %in% c('Mar')) %>%
group_by(fips) %>% summarise(temp = mean(tempc)) %>%
rename(county_fips = fips) %>%
mutate(county_fips = as.character(as.numeric(county_fips)))
df_us_ctrl <- df_us_ctrl %>% plyr::join(df_us_temp, 'county_fips')
# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
max(df_us_prev$date), 1)
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
# join data frames
df_us_prev <- df_us_prev %>%
plyr::join(df_us_ctrl, by='county_fips') %>%
plyr::join(df_us_pers, by='county_fips') %>%
merge(df_dates, by='date') %>%
arrange(county_fips, date) %>%
drop_na()
df_us_prev %>% select(-date, -rate_day, -time) %>%
distinct() %>% write_csv('df_us_pers_fips.csv')
df_us_prev %>% select(county_fips) %>% distinct() %>% nrow()
## [1] 2486
# join data frames
df_us_death <- df_us_death %>%
plyr::join(df_us_ctrl, by='county_fips') %>%
plyr::join(df_us_pers, by='county_fips') %>%
drop_na()
# save df
df_us_death %>%
#select(county_fips, case_rate, death_rate) %>%
write_csv('us_death_fips_controls.csv')
easter <- seq.Date(as.Date('2020-04-10'), as.Date('2020-04-13'), 1)
df_us_loess <- df_us_socdist %>%
mutate(weekday = format(date, '%u')) %>%
filter(!weekday %in% c('6','7') | date %in% easter) %>%
split(.$county_fips) %>%
map(~ loess(socdist_single_tile ~ time, data = .)) %>%
map(predict, 1:max(df_us_socdist$time)) %>%
bind_rows() %>%
gather(key = 'county_fips', value = 'loess') %>%
group_by(county_fips) %>%
mutate(time = row_number())
df_us_loess_2 <- df_us_socdist %>%
mutate(weekday = format(date, '%u')) %>%
filter(!weekday %in% c('6','7') | date %in% easter) %>%
split(.$county_fips) %>%
map(~ loess(socdist_tiles ~ time, data = .)) %>%
map(predict, 1:max(df_us_socdist$time)) %>%
bind_rows() %>%
gather(key = 'county_fips', value = 'loess') %>%
rename(loess_2 = loess) %>%
group_by(county_fips) %>%
mutate(time = row_number())
df_us_socdist <- df_us_socdist %>%
merge(df_us_loess, by=c('county_fips', 'time')) %>%
merge(df_us_loess_2, by=c('county_fips', 'time')) %>%
mutate(weekday = format(date, '%u')) %>%
mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7') | date %in% easter,
loess, socdist_single_tile),
socdist_tiles_clean = ifelse(weekday %in% c('6','7') | date %in% easter,
loess_2, socdist_tiles)) %>%
arrange(county_fips, time) %>%
select(-weekday)
df_us_socdist <- df_us_socdist %>% drop_na() %>% mutate(time = time-1)
# get onset day
df_us_onset_prev <- df_us_prev %>%
group_by(county_fips) %>%
filter(rate_day > 0.1) %>%
summarise(onset_prev = min(time))
# merge with county data
df_us_onset_prev <- df_us_prev %>%
select(-date, -time, -rate_day) %>%
distinct() %>%
mutate(county_fips = as.character(county_fips)) %>%
left_join(df_us_onset_prev, by = 'county_fips')
# handle censored data
df_us_onset_prev <- df_us_onset_prev %>%
mutate(event = ifelse(is.na(onset_prev), 0, 1)) %>%
mutate(onset_prev = replace_na(onset_prev, as.numeric(diff(range(df_us_prev$date)))+1))
# cut time series
df_us_prev <- df_us_prev %>%
filter(date > '2020-03-15' & date < '2020-04-15')
# extract slope prevalence
df_us_slope_prev <- df_us_prev %>%
split(.$county_fips) %>%
map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
map(coef) %>%
map_dbl('time') %>%
as.data.frame() %>%
rownames_to_column('county_fips') %>%
rename(slope_prev = '.')
# merge with county data
df_us_slope_prev <- df_us_onset_prev %>%
inner_join(df_us_slope_prev, by = 'county_fips')
# get unscaled object for descriptives
df_us_prev_desc <- df_us_slope_prev
# cut time series before onset
df_us_slope_var <- df_us_prev %>%
filter(date <= '2020-05-15') %>%
group_by(county_fips) %>%
filter(rate_day > 0.1) %>%
mutate(time = time-min(time)+1) %>%
ungroup() %>%
filter(time <= 30)
# extract slope prevalence
df_us_slope_var <- df_us_slope_var %>%
split(.$county_fips) %>%
map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
map(coef) %>%
map_dbl('time') %>%
as.data.frame() %>%
rownames_to_column('county_fips') %>%
rename(slope_prev_var = '.')
# merge with county data
df_us_slope_prev <- df_us_slope_prev %>%
left_join(df_us_slope_var, by = 'county_fips') %>%
mutate(slope_prev_var = replace_na(slope_prev_var, 0))
df_us_prev_unscaled <- inner_join(df_us_slope_prev, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_death_unscaled <- inner_join(df_us_death, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_socdist_unscaled <- inner_join(df_us_cpt_socdist, df_us_slope_prev %>% select(county_fips), by='county_fips')
fips_list <- df_us_prev_unscaled %>% select(county_fips)
df_us_prev_unscaled %>% merge(df_us_death_unscaled %>% select(county_fips:death_rate_0930)) %>%
merge(df_us_socdist_unscaled %>% select(county_fips, cpt_day_socdist_2, mean_diff_socdist_2)) %>%
select(-event) %>% write_csv('us_all_variables.csv')
df_us_prev_scaled <- df_us_prev_unscaled %>%
mutate_at(vars(-county_fips, -event), scale)
df_us_death_scaled <- df_us_death_unscaled %>%
mutate_at(vars(-county_fips), scale)
df_us_socdist_scaled <- df_us_socdist_unscaled %>%
mutate_at(vars(-county_fips, -event), scale)
# create UDF to calculate lagged variables
calc_lags <- function(df, weights, cols){
cols_only <- df %>% select(cols)
cols_lag <- weights %*% as.matrix(cols_only) %>%
as.matrix() %>% as.data.frame()
names(cols_lag) <- paste0(names(cols_lag), '_lag')
return(cols_lag)
}
# read_weight matrix
weight_mat_norm <- read_csv('df_us_spatial_weights_matrix.csv')
##
## -- Column specification --------------------------------------------------------------------
## cols(
## .default = col_double()
## )
## i Use `spec()` for the full column specifications.
weight_mat_norm <- weight_mat_norm %>% select(-county_fips) %>% as.matrix()
# generate spatially lagged y
y_only <- df_us_prev_scaled %>% select(onset_prev,slope_prev,slope_prev_var) %>% names()
y_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, y_only)
# generate spatially lagged X
X_only <- df_us_prev_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, X_only)
# bind new variables to df
df_us_prev_scaled <- cbind(df_us_prev_scaled, y_lag, X_lag)
# generate spatially lagged y
y_only <- df_us_death_scaled %>% select(case_rate_0415:death_rate_0930) %>% names()
y_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, y_only)
# generate spatially lagged X
X_only <- df_us_death_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, X_only)
# bind new variables to df
df_us_death_scaled <- cbind(df_us_death_scaled, y_lag, X_lag)
# generate spatially lagged y
y_only <- df_us_socdist_scaled %>% select(cpt_day_socdist_2,mean_diff_socdist_2) %>% names()
y_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, y_only)
# generate spatially lagged X
X_only <- df_us_socdist_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, X_only)
# bind new variables to df
df_us_socdist_scaled <- cbind(df_us_socdist_scaled, y_lag, X_lag)
write_csv(df_us_prev_scaled, 'df_us_slope_prev.csv')
write_csv(df_us_death_scaled, 'df_us_death_scaled.csv')
write_csv(df_us_socdist_scaled, 'df_us_cpt_socdist.csv')
# define function to calculate specification curve analyses for all traits
spec_calculate <- function(df, y, model, controls, all.comb = T){
spec_results_o <- run_specs(df = df,
y = c(y), x = c("pers_o"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_c <- run_specs(df = df,
y = c(y), x = c("pers_c"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_e <- run_specs(df = df,
y = c(y), x = c("pers_e"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_a <- run_specs(df = df,
y = c(y), x = c("pers_a"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_n <- run_specs(df = df,
y = c(y), x = c("pers_n"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results <- list(spec_results_o, spec_results_c, spec_results_e,
spec_results_a, spec_results_n)
names(spec_results) <- list('spec_results_o', 'spec_results_c', 'spec_results_e',
'spec_results_a', 'spec_results_n')
return(spec_results)
}
# adapted from specr package code
format_results <- function(df, var, null = 0, desc = FALSE) {
# rank specs
if (isFALSE(desc)) {
df <- df %>%
dplyr::arrange(!! var)
} else {
df <- df %>%
dplyr::arrange(desc(!! var))
}
# create rank variable and color significance
df <- df %>%
dplyr::mutate(specifications = 1:nrow(df),
color = case_when(conf.low > null ~ "#377eb8",
conf.high < null ~ "#e41a1c",
TRUE ~ "darkgrey"))
return(df)
}
# define function to plot single specification curve
plot_curves <- function(results, filename, hr=F){
file_path_eps <- paste0("../../Plots/US/", filename,".eps")
file_path_pdf <- paste0("../../Plots/US/", filename,".pdf")
file_path_png <- paste0("../../Plots/US/", filename,".png")
if(hr==F){
results %>%
format_results(var = .$estimate, null = 0, desc = F) %>%
ggplot(aes(x = specifications,
y = estimate,
ymin = conf.low,
ymax = conf.high,
color = color)) +
geom_point(aes(color = color),
size = 1) +
theme_minimal() +
scale_color_identity() +
theme(strip.text = element_blank(),
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")) +
labs(x = "") +
geom_pointrange(alpha = 0.05,
size = .6,
fatten = 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
labs(x = "", y = "standarized coefficient") +
coord_fixed(ratio = 2000, ylim = c(-0.4, 0.4)) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
ggsave(file=file_path_eps, device = 'eps')+
ggsave(file=file_path_pdf, device = 'pdf')+
ggsave(file=file_path_png, device = 'png')
}else{
results %>%
format_results(var = .$estimate, null = 1, desc = F) %>%
ggplot(aes(x = specifications,
y = estimate,
ymin = conf.low,
ymax = conf.high,
color = color)) +
geom_point(aes(color = color),
size = 1) +
theme_minimal() +
scale_color_identity() +
theme(strip.text = element_blank(),
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")) +
labs(x = "") +
geom_pointrange(alpha = 0.05,
size = .6,
fatten = 1) +
geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
labs(x = "", y = "standarized coefficient") +
coord_fixed(ratio = 2000, ylim = c(0.6, 1.4)) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
ggsave(file=file_path_eps, device = 'eps')+
ggsave(file=file_path_pdf, device = 'pdf')+
ggsave(file=file_path_png, device = 'png')
}
}
# define function to plot all curves in list
plot_all_curves <- function(ls, analysis, hr=F){
pers <- c('o', 'c', 'e', 'a', 'n')
filenames <- as.list(paste0(analysis, '_', pers))
hr <- as.list(rep(hr, 5))
pmap(list(ls, filenames, hr), plot_curves)
}
# define function to calculate summary stats for single specification curve
calc_summary <- function(df){
dft <- df %>% select(estimate:fit_nobs)
dft <- dft %>% mutate(significant = as.numeric(p.value<0.05),
positive = as.numeric(statistic>0),
negative = as.numeric(statistic<0),
significant_positive = as.numeric(p.value<0.05 & statistic>0),
significant_negative = as.numeric(p.value<0.05 & statistic<0))
mean_temp <- dft %>% map_if(is.numeric, mean, .else=NULL) %>% as.data.frame()
sd_temp <- dft %>% map_if(is.numeric, sd, .else=NULL) %>% as.data.frame()
df_temp <- rbind(mean_temp, sd_temp)
row.names(df_temp) <- c('mean', 'sd')
return(df_temp)
}
# define function to calculate summary stats for all curves in list
calc_all_summaries <- function(ls){
ls <- ls %>% map(calc_summary)
names(ls) <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')
return(ls)
}
coef_to_hr <- function(df){
df %>% mutate(conf.low = exp(estimate-1.96*std.error),
conf.high = exp(estimate+1.96*std.error),
estimate = exp(estimate))
}
filter_socdist <- function(df){
df %>% filter(str_detect(controls, 'onset_prev') &
str_detect(controls, 'slope_prev'))
}
covariates <- c("airport_dist", "conservative", 'male', 'age', 'popdens',
'manufact', 'tourism', 'academics', 'medinc', 'healthcare',
'temp')
cox_model <- function(formula, data){
formula <- as.formula(formula)
coxph(formula = formula, data = data)}
cox_onset_prev <- spec_calculate(df = df_us_prev_scaled,
y = "Surv(onset_prev, event)",
model = "cox_model",
controls = covariates %>% append('onset_prev_lag'),
all.comb = T)
cox_onset_prev_hr <- cox_onset_prev %>% map(coef_to_hr)
calc_all_summaries(cox_onset_prev_hr)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.16172145 0.026014376 5.766790 0.007433825 1.10404476 1.22241719 2366
## sd 0.06206793 0.001120517 2.218639 0.040648218 0.06034861 0.06388357 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 638.2887 2.434685e-35 846.4471
## sd 0 136.8117 1.056543e-33 203.5470
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 1.186354e-35 769.6777 9.006769e-36 NA
## sd 4.978832e-34 172.8896 3.775740e-34 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23515525 0.9999908 0.68063283
## sd NA 0.04511866 0.0000000 0.01744978
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0065112174 -13403.98321 26821.9664 26860.8760 2366
## sd 0.0002305437 68.40587 134.2635 127.4085 0
## significant positive negative significant_positive significant_negative
## mean 0.9633789 1 0 0.9633789 0
## sd 0.1878526 0 0 0.1878526 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.14595693 0.023675413 5.720862 0.001312684 1.09398662 1.20039740 2366
## sd 0.04204126 0.000533867 1.536933 0.009826545 0.03991021 0.04431706 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 634.9587 6.334368e-13 836.1058
## sd 0 158.0395 4.053783e-11 223.5635
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 5.061211e-13 753.4356 4.799701e-13 NA
## sd 3.238962e-11 188.4106 3.071593e-11 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23363277 0.9999908 0.67911602
## sd NA 0.05275286 0.0000000 0.02191225
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0064685052 -13405.64817 26825.2963 26864.2060 2366
## sd 0.0002280484 79.01976 155.5847 148.9729 0
## significant positive negative significant_positive significant_negative
## mean 0.99414062 1 0 0.99414062 0
## sd 0.07633129 0 0 0.07633129 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.0053968 0.0230413250 0.2407622 0.5618723 0.96101875 1.05182497 2366
## sd 0.0210153 0.0004509163 0.9189734 0.2847874 0.02088586 0.02111908 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 601.0713 3.043490e-07 811.6531
## sd 0 156.6765 1.947733e-05 226.5546
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 3.236460e-07 731.9152 3.223853e-07 NA
## sd 2.071186e-05 193.9189 2.063117e-05 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.22261503 0.9999908 0.67472760
## sd NA 0.05272031 0.0000000 0.02245513
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0065180540 -13422.59188 26859.1838 26898.0934 2366
## sd 0.0002496992 78.33827 154.1384 147.2827 0
## significant positive negative significant_positive significant_negative
## mean 0.06713867 0.5056152 0.4943848 0.06713867 0
## sd 0.25029256 0.5000295 0.5000295 0.25029256 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.12712077 0.0241346132 4.911333 0.02630132 1.07503821 1.18172942 2366
## sd 0.05222035 0.0007992203 1.927298 0.11581101 0.04966844 0.05496148 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 627.6168 7.129527e-12 832.9647
## sd 0 157.1791 4.562814e-10 223.8587
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 5.323420e-12 751.675 5.166855e-12 NA
## sd 3.406905e-10 190.272 3.306702e-10 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23126981 0.9999908 0.67999298
## sd NA 0.05263276 0.0000000 0.02169922
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0064707427 -13409.31914 26832.6383 26871.5479 2366
## sd 0.0002249616 78.58953 154.7361 148.1604 0
## significant positive negative significant_positive
## mean 0.9313965 0.99462891 0.005371094 0.9313965
## sd 0.2528096 0.07309959 0.073099587 0.2528096
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 0.87994919 0.0254283747 -5.139354 0.006040369 0.83711030 0.92498283 2366
## sd 0.04229444 0.0008502208 2.109454 0.019840097 0.03905764 0.04576956 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 631.3160 1.042710e-34 830.4507
## sd 0 135.0754 6.242899e-33 207.2895
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 8.175495e-34 746.0484 4.523746e-34 NA
## sd 4.972358e-32 170.8610 2.735136e-32 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23293232 0.9999908 0.67921943
## sd NA 0.04458783 0.0000000 0.01770616
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0064412561 -13407.4695 26828.939 26867.8487 2366
## sd 0.0001869198 67.5377 132.492 125.5353 0
## significant positive negative significant_positive significant_negative
## mean 0.9619141 0 1 0 0.9619141
## sd 0.1914271 0 0 0 0.1914271
plot_all_curves(cox_onset_prev_hr, 'cox_onset_prev_hr', hr = T)
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
cox_onset_prev_ctrl <- coxph(Surv(onset_prev, event) ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + onset_prev_lag,
data = df_us_prev_scaled)
summary(cox_onset_prev_ctrl)
## Call:
## coxph(formula = Surv(onset_prev, event) ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + onset_prev_lag, data = df_us_prev_scaled)
##
## n= 2366, number of events= 1917
##
## coef exp(coef) se(coef) z Pr(>|z|)
## airport_dist -0.06929 0.93306 0.02959 -2.341 0.019219 *
## conservative -0.26778 0.76508 0.02758 -9.709 < 2e-16 ***
## male -0.10249 0.90259 0.02677 -3.828 0.000129 ***
## age -0.11380 0.89244 0.02439 -4.666 3.07e-06 ***
## popdens 0.03785 1.03857 0.02364 1.601 0.109371
## manufact 0.10299 1.10848 0.02806 3.671 0.000242 ***
## tourism 0.05239 1.05379 0.02662 1.968 0.049029 *
## academics 0.17781 1.19460 0.04482 3.968 7.26e-05 ***
## medinc 0.33067 1.39190 0.03620 9.134 < 2e-16 ***
## healthcare 0.11392 1.12067 0.02693 4.230 2.34e-05 ***
## temp 0.32833 1.38865 0.03005 10.926 < 2e-16 ***
## onset_prev_lag -0.31413 0.73042 0.04236 -7.416 1.21e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## airport_dist 0.9331 1.0717 0.8805 0.9888
## conservative 0.7651 1.3071 0.7248 0.8076
## male 0.9026 1.1079 0.8565 0.9512
## age 0.8924 1.1205 0.8508 0.9361
## popdens 1.0386 0.9629 0.9916 1.0878
## manufact 1.1085 0.9021 1.0492 1.1712
## tourism 1.0538 0.9490 1.0002 1.1102
## academics 1.1946 0.8371 1.0941 1.3043
## medinc 1.3919 0.7184 1.2966 1.4942
## healthcare 1.1207 0.8923 1.0630 1.1814
## temp 1.3886 0.7201 1.3092 1.4729
## onset_prev_lag 0.7304 1.3691 0.6722 0.7937
##
## Concordance= 0.709 (se = 0.006 )
## Likelihood ratio test= 896.6 on 12 df, p=<2e-16
## Wald test = 1059 on 12 df, p=<2e-16
## Score (logrank) test = 1203 on 12 df, p=<2e-16
# fixed time windows
lm_slope_prev <- spec_calculate(df = df_us_prev_scaled,
y = "slope_prev",
model = "lm",
controls = covariates %>% append('slope_prev_lag'),
all.comb = T)
calc_all_summaries(lm_slope_prev)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.06650710 0.0218723943 3.051989 0.1060491 0.02361598 0.1093982
## sd 0.03848726 0.0007661755 1.803760 0.2188002 0.03867878 0.0383537
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.16877711 0.16632951 0.9128307 70.25183 1.477756e-24
## sd 0.03743269 0.03708676 0.0202677 14.16632 5.895233e-23
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3136.82893 6291.6579 6343.57847 1965.8421 2358.000000
## sd 1.732262 53.08913 103.6386 96.63557 88.5283 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.7268066 0.9838867 0.01611328 0.7268066
## sd 0 0.4456537 0.1259266 0.12592662 0.4456537
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.08159129 0.0195674938 4.182360 0.01737430 0.04322001 0.11996257
## sd 0.02905970 0.0004474735 1.515199 0.06422916 0.02934306 0.02880029
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17120959 0.16877025 0.91146684 71.33624 8.942856e-10
## sd 0.03958576 0.03924605 0.02140208 15.26681 4.423700e-08
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3133.22547 6284.4509 6336.3715 1960.08931 2358.000000
## sd 1.732262 55.91484 109.2984 102.3039 93.62033 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9228516 1 0 0.9228516
## sd 0 0.2668594 0 0 0.2668594
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.00674942 0.0191251166 0.3431732 0.6112414 -0.03075437 0.04425321
## sd 0.01393104 0.0004257969 0.7179348 0.2737243 0.01343339 0.01445982
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.16442606 0.16196752 0.91517150 67.74067 6.606324e-07
## sd 0.04118872 0.04084978 0.02221979 15.14588 3.531075e-05
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3142.77640 6303.5528 6355.4734 1976.13237 2358.000000
## sd 1.732262 57.88175 113.2271 106.2052 97.41132 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.02978516 0.6547852 0.3452148 0.02978516
## sd 0 0.17001487 0.4754963 0.4754963 0.17001487
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.11095509 0.0199702382 5.583480 0.006947667 0.07179404 0.15011614
## sd 0.03633964 0.0005199555 1.888186 0.042481328 0.03686347 0.03583717
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17639488 0.17397035 0.90861824 74.12927 4.806967e-13
## sd 0.03882516 0.03848915 0.02103938 15.64261 2.739705e-11
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.83790 6269.6758 6321.5964 1947.82611 2358.000000
## sd 1.732262 55.10243 107.6935 100.7668 91.82149 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9714355 1 0 0.9714355
## sd 0 0.1665992 0 0 0.1665992
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.06960205 0.0203062105 -3.442200 0.1089957 -0.10942193 -0.02978217
## sd 0.03665124 0.0004166468 1.838027 0.2464262 0.03637535 0.03694315
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.16969364 0.16724876 0.91232268 70.72756 9.620287e-22
## sd 0.03779965 0.03745861 0.02046116 14.60225 5.487729e-20
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3135.5009 6289.0018 6340.92240 1963.67454 2358.000000
## sd 1.732262 53.5602 104.6036 97.66964 89.39616 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8081055 0.02587891 0.9741211 0
## sd 0 0.3938387 0.15879340 0.1587934 0
## significant_negative
## mean 0.8081055
## sd 0.3938387
plot_all_curves(lm_slope_prev, 'lm_slope_prev')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_slope_prev_ctrl <- lm(slope_prev ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + slope_prev_lag,
data = df_us_prev_scaled)
summary(lm_slope_prev_ctrl)
##
## Call:
## lm(formula = slope_prev ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + slope_prev_lag, data = df_us_prev_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9236 -0.4960 -0.2000 0.2154 6.0413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.003469 0.017857 -0.194 0.84600
## airport_dist 0.008823 0.021113 0.418 0.67606
## conservative -0.247126 0.022130 -11.167 < 2e-16 ***
## male -0.080222 0.019710 -4.070 4.85e-05 ***
## age -0.052478 0.019239 -2.728 0.00642 **
## popdens 0.139040 0.020596 6.751 1.85e-11 ***
## manufact 0.006652 0.022098 0.301 0.76342
## tourism -0.008209 0.020752 -0.396 0.69245
## academics -0.084176 0.038229 -2.202 0.02777 *
## medinc 0.263901 0.030260 8.721 < 2e-16 ***
## healthcare 0.041049 0.023823 1.723 0.08500 .
## temp 0.219069 0.023035 9.510 < 2e-16 ***
## slope_prev_lag 0.303142 0.031452 9.638 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8684 on 2353 degrees of freedom
## Multiple R-squared: 0.2497, Adjusted R-squared: 0.2458
## F-statistic: 65.25 on 12 and 2353 DF, p-value: < 2.2e-16
# variable time windows
lm_slope_prev_var <- spec_calculate(df = df_us_prev_scaled,
y = "slope_prev_var",
model = "lm",
controls = covariates %>% append('slope_prev_var_lag'),
all.comb = T)
calc_all_summaries(lm_slope_prev_var)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.01843360 0.0228448236 0.8092564 0.3374739 -0.02636443 0.06323163
## sd 0.03179557 0.0008021976 1.4119922 0.2928829 0.03186131 0.03180759
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09368869 0.09101223 0.95330359 35.311428 4.358225e-09
## sd 0.02737264 0.02697465 0.01414535 8.558473 1.601068e-07
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3239.79451 6497.58901 6549.50962 2143.4263 2358.000000
## sd 1.732262 35.73047 69.01074 62.48346 64.7363 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.2021484 0.7170410 0.2829590 0.1867676
## sd 0 0.4016514 0.4504917 0.4504917 0.3897724
## significant_negative
## mean 0.01538086
## sd 0.12307716
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.09199460 0.0203745098 4.529303 0.003265235 0.05204079 0.13194841
## sd 0.02635443 0.0003465441 1.337233 0.009768975 0.02675594 0.02596451
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.10121874 0.09856375 0.94935036 38.710210 2.822507e-11
## sd 0.02521521 0.02480452 0.01304579 8.221666 1.221468e-09
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3230.00003 6478.00006 6529.92067 2125.61769 2358.000000
## sd 1.732262 33.10894 63.71753 57.07268 59.63398 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9880371 1 0 0.9880371
## sd 0 0.1087321 0 0 0.1087321
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean -0.007067026 0.0199332406 -0.3585409 0.6301136 -0.046155523 0.03202147
## sd 0.010169162 0.0002821194 0.5126193 0.2278677 0.009878615 0.01048088
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09279622 0.09011762 0.95376562 34.846983 0.0001193256
## sd 0.02829475 0.02789763 0.01461407 8.898946 0.0044740979
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3240.92388 6499.84776 6551.76837 2145.53694 2358.000000
## sd 1.732262 36.85796 71.26061 64.69776 66.91708 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0 0.2226562 0.7773438 0
## sd 0 0 0.4160802 0.4160802 0
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.11504837 0.0208132613 5.555493 0.00127825 0.07423418 0.15586257
## sd 0.03195153 0.0004849158 1.619015 0.00604934 0.03262194 0.03129565
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.10543597 0.10279285 0.94712809 40.680990 8.095085e-15
## sd 0.02407654 0.02366633 0.01247244 8.311559 3.982934e-13
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3224.47345 6466.94691 6518.86751 2115.64393 2358.000000
## sd 1.732262 31.74214 60.99344 54.40905 56.94102 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.99584961 1 0 0.99584961
## sd 0 0.06429754 0 0 0.06429754
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.04812700 0.0211994748 -2.285890 0.1692657 -0.08969855 -0.006555458
## sd 0.03063361 0.0004145531 1.471557 0.2877701 0.03022191 0.031061141
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09553382 0.09286226 0.95234100 36.196044 9.356756e-12
## sd 0.02627732 0.02587729 0.01357951 8.357229 3.360019e-10
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3237.42430 6492.84860 6544.76920 2139.06251 2358.000000
## sd 1.732262 34.34926 66.24897 59.74882 62.14587 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6511230 0.1003418 0.8996582 0
## sd 0 0.4766732 0.3004919 0.3004919 0
## significant_negative
## mean 0.6511230
## sd 0.4766732
plot_all_curves(lm_slope_prev_var, 'lm_slope_prev_var')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_slope_prev_var_ctrl <- lm(slope_prev_var ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + slope_prev_var_lag,
data = df_us_prev_scaled)
summary(lm_slope_prev_var_ctrl)
##
## Call:
## lm(formula = slope_prev_var ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + slope_prev_var_lag, data = df_us_prev_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1372 -0.5606 -0.2239 0.3000 9.4699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002831 0.018948 -0.149 0.881224
## airport_dist 0.021432 0.022402 0.957 0.338819
## conservative -0.223392 0.023463 -9.521 < 2e-16 ***
## male -0.070245 0.020914 -3.359 0.000795 ***
## age -0.060962 0.020410 -2.987 0.002847 **
## popdens 0.111035 0.021855 5.081 4.06e-07 ***
## manufact 0.037805 0.023416 1.614 0.106557
## tourism -0.026901 0.022017 -1.222 0.221893
## academics -0.131707 0.040554 -3.248 0.001180 **
## medinc 0.213479 0.032092 6.652 3.58e-11 ***
## healthcare 0.032853 0.025277 1.300 0.193814
## temp 0.222065 0.024472 9.074 < 2e-16 ***
## slope_prev_var_lag 0.235435 0.034511 6.822 1.14e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9214 on 2353 degrees of freedom
## Multiple R-squared: 0.1553, Adjusted R-squared: 0.151
## F-statistic: 36.04 on 12 and 2353 DF, p-value: < 2.2e-16
lm_cases_0415 <- spec_calculate(df = df_us_death_scaled,
y = "case_rate_0415",
model = "lm",
controls = covariates %>% append('case_rate_0415_lag'),
all.comb = T)
calc_all_summaries(lm_cases_0415)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.04876701 0.0227720892 2.150331 0.1735715 0.004111615 0.09342241
## sd 0.03165959 0.0007998025 1.423872 0.2427764 0.031873299 0.03152256
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09943230 0.09677125 0.95027531 38.13421 1.473910e-15
## sd 0.02766227 0.02732592 0.01433976 10.44299 4.683503e-14
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.25867 6482.51733 6534.43794 2129.84261 2358.000000
## sd 1.732262 36.16042 70.17978 64.65003 65.42127 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.4975586 0.9543457 0.0456543 0.4975586
## sd 0 0.5000551 0.2087597 0.2087597 0.5000551
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.04262502 0.0203947736 2.104424 0.1466779 0.002631474 0.08261857
## sd 0.02066561 0.0004616577 1.037216 0.2339358 0.021205973 0.02015145
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09892688 0.09626521 0.95052224 37.78673 2.060162e-05
## sd 0.03004164 0.02971634 0.01556665 11.45476 9.415328e-04
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.82626 6483.65252 6535.57312 2131.03793 2358.000000
## sd 1.732262 39.13348 76.15729 70.67325 71.04847 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6054688 0.9758301 0.02416992 0.6054688
## sd 0 0.4888094 0.1535952 0.15359524 0.4888094
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.012073121 0.0198842257 0.6045850 0.5663104 -0.026919260 0.051065502
## sd 0.009551023 0.0003069414 0.4745963 0.2407368 0.009344157 0.009790576
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09705206 0.09438499 0.95151078 36.93931 0.0000119453
## sd 0.03006737 0.02973723 0.01556634 11.25001 0.0005722379
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3235.28615 6488.5723 6540.49291 2135.47188 2358.000000
## sd 1.732262 39.11061 76.0876 70.52753 71.10934 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.01464844 0.8945312 0.1054688 0.01464844
## sd 0 0.12015567 0.3071940 0.3071940 0.12015567
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.07795076 0.020834282 3.766163 0.02775633 0.03709534 0.11880617
## sd 0.02637373 0.000592395 1.323556 0.09913529 0.02709396 0.02568587
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.1028759 0.1002257 0.94843600 39.58762 1.825308e-07
## sd 0.0300417 0.0297263 0.01560479 11.81480 7.034892e-06
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3227.62466 6473.24933 6525.16993 2121.69844 2358.000000
## sd 1.732262 39.29875 76.53274 71.18872 71.04863 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9033203 0.99951172 0.0004882812 0.9033203
## sd 0 0.2955572 0.02209439 0.0220943887 0.2955572
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.04774835 0.0211530060 -2.276129 0.1478689 -0.08922878 -0.006267932
## sd 0.02663904 0.0004849494 1.291895 0.2612650 0.02607382 0.027225747
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09947116 0.09681049 0.9502443 38.13183 2.304518e-12
## sd 0.02894177 0.02861880 0.0150099 11.11191 1.129695e-10
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.1560 6482.31207 6534.233 2129.75070 2358.000000
## sd 1.732262 37.7909 73.49507 68.108 68.44729 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6376953 0.05932617 0.9406738 0
## sd 0 0.4807249 0.23626300 0.2362630 0
## significant_negative
## mean 0.6376953
## sd 0.4807249
plot_all_curves(lm_cases_0415, 'lm_cases_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_cases_0415_ctrl <- lm(case_rate_0415 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + case_rate_0415_lag,
data = df_us_death_scaled)
summary(lm_cases_0415_ctrl)
##
## Call:
## lm(formula = case_rate_0415 ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + case_rate_0415_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9724 -0.3185 -0.1425 0.0664 14.0442
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002321 0.018957 -0.122 0.902545
## airport_dist 0.039999 0.022414 1.785 0.074469 .
## conservative -0.192255 0.023487 -8.186 4.38e-16 ***
## male -0.054249 0.020924 -2.593 0.009582 **
## age -0.015480 0.020419 -0.758 0.448449
## popdens 0.197538 0.021860 9.037 < 2e-16 ***
## manufact -0.014696 0.023468 -0.626 0.531255
## tourism -0.015331 0.022028 -0.696 0.486509
## academics -0.148486 0.040571 -3.660 0.000258 ***
## medinc 0.242370 0.032107 7.549 6.24e-14 ***
## healthcare 0.034985 0.025290 1.383 0.166693
## temp 0.155413 0.024405 6.368 2.29e-10 ***
## case_rate_0415_lag 0.187843 0.034440 5.454 5.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9219 on 2353 degrees of freedom
## Multiple R-squared: 0.1545, Adjusted R-squared: 0.1502
## F-statistic: 35.83 on 12 and 2353 DF, p-value: < 2.2e-16
lm_cases_0930 <- spec_calculate(df = df_us_death_scaled,
y = "case_rate_0930",
model = "lm",
controls = covariates %>% append('case_rate_0930_lag'),
all.comb = T)
calc_all_summaries(lm_cases_0930)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean -0.08748817 0.020625280 -4.336426 0.06828373 -0.12793374 -0.04704261
## sd 0.04752857 0.001548285 2.489699 0.17839331 0.04636929 0.04884932
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.2576031 0.2554435 0.86051765 125.08352 1.479292e-08
## sd 0.1115422 0.1115596 0.06377168 61.40988 4.197068e-07
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2991.3823 6000.765 6052.6852 1755.7688 2358.000000
## sd 1.732262 173.8364 345.775 340.4391 263.7973 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8127441 0.01391602 0.9860840 0
## sd 0 0.3901644 0.11715678 0.1171568 0
## significant_negative
## mean 0.8127441
## sd 0.3901644
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.12308312 0.0184266866 6.583651 0.001060154 0.08694894 0.1592173
## sd 0.05683318 0.0009247264 2.811747 0.004025125 0.05550506 0.0581875
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.26752476 0.26538940 0.85530837 130.36518 3.170247e-32
## sd 0.09585838 0.09583076 0.05530759 53.15408 6.694621e-31
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2978.5255 5975.0511 6026.9717 1732.3039 2358.000000
## sd 1.732262 152.1392 302.3326 296.8751 226.7051 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.06069173 0.018035486 3.3280702 0.01604293 0.02532467 0.09605878
## sd 0.01982743 0.001245754 0.9305813 0.04801242 0.01817235 0.02163228
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.2542249 0.2520547 0.8626706 122.18282 5.151662e-06
## sd 0.1072672 0.1072635 0.0612009 57.77144 9.231320e-05
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2997.8262 6013.6525 6065.5731 1763.7582 2358.000000
## sd 1.732262 166.5089 331.0699 325.5924 253.6869 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9135742 1 0 0.9135742
## sd 0 0.2810261 0 0 0.2810261
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.20856723 0.018548064 11.178947 1.455611e-10 0.17219502 0.24493944
## sd 0.06258162 0.000809816 3.068922 1.114284e-09 0.06164063 0.06354836
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.29257675 0.29051179 0.84079681 147.40723 4.922386e-58
## sd 0.08593704 0.08589113 0.05049305 51.53961 1.583845e-56
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2938.7056 5895.4112 5947.3318 1673.0560 2358.000000
## sd 1.732262 141.4434 280.9384 275.4853 203.2411 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.18517486 0.018911721 -9.704595 1.720658e-06 -0.22226019 -0.14808953
## sd 0.05329514 0.001037646 2.400281 2.810106e-05 0.05486468 0.05175801
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.28204011 0.27994829 0.84680190 139.91098 8.135163e-27
## sd 0.09375431 0.09371317 0.05458002 53.56275 2.424387e-25
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2954.9142 5927.8285 5979.7491 1697.9751 2358.000000
## sd 1.732262 151.5238 301.0411 295.4047 221.7289 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 0 1 0
## sd 0 0 0 0 0
## significant_negative
## mean 1
## sd 0
plot_all_curves(lm_cases_0930, 'lm_cases_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_cases_0930_ctrl <- lm(case_rate_0930 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + case_rate_0930_lag,
data = df_us_death_scaled)
summary(lm_cases_0930_ctrl)
##
## Call:
## lm(formula = case_rate_0930 ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + case_rate_0930_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5977 -0.4787 -0.1204 0.3630 6.7757
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.001975 0.015604 -0.127 0.89929
## airport_dist 0.119021 0.018459 6.448 1.37e-10 ***
## conservative -0.214833 0.019372 -11.090 < 2e-16 ***
## male 0.152308 0.017222 8.844 < 2e-16 ***
## age -0.293421 0.016809 -17.457 < 2e-16 ***
## popdens 0.009099 0.017999 0.506 0.61323
## manufact 0.173597 0.019327 8.982 < 2e-16 ***
## tourism -0.021363 0.018148 -1.177 0.23923
## academics -0.107078 0.033555 -3.191 0.00144 **
## medinc -0.025005 0.026524 -0.943 0.34592
## healthcare 0.066311 0.020818 3.185 0.00147 **
## temp 0.483630 0.021645 22.344 < 2e-16 ***
## case_rate_0930_lag 0.226178 0.028937 7.816 8.14e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7589 on 2353 degrees of freedom
## Multiple R-squared: 0.427, Adjusted R-squared: 0.4241
## F-statistic: 146.1 on 12 and 2353 DF, p-value: < 2.2e-16
lm_deaths_0415 <- spec_calculate(df = df_us_death_scaled,
y = "death_rate_0415",
model = "lm",
controls = covariates %>% append('death_rate_0415_lag'),
all.comb = T)
calc_all_summaries(lm_deaths_0415)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.02159386 0.0232443310 0.9352266 0.3927249 -0.02398759 0.06717531
## sd 0.02622739 0.0008315074 1.1494376 0.3084098 0.02646229 0.02609246
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06227496 0.05950046 0.969742909 22.662019 8.001974e-08
## sd 0.01965118 0.01924889 0.009912406 6.298663 2.447263e-06
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.3844 6578.76886 6630.68946 2217.71972 2358.000000
## sd 1.732262 24.7346 47.20338 41.60315 46.47504 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.1784668 0.7832031 0.2167969 0.1784668
## sd 0 0.3829520 0.4121134 0.4121134 0.3829520
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.04372995 0.0207956843 2.1162652 0.1315361 0.002950226 0.08450968
## sd 0.01888260 0.0004178388 0.9341132 0.2095286 0.019457170 0.01832666
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06352029 0.06074950 0.96909709 23.161227 6.476435e-06
## sd 0.01994210 0.01954641 0.01006702 6.585468 2.596939e-04
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3278.80427 6575.60854 6627.52914 2214.77451 2358.000000
## sd 1.732262 25.10753 47.97863 42.46171 47.16306 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6140137 0.99804688 0.001953125 0.6140137
## sd 0 0.4868868 0.04415638 0.044156385 0.4868868
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean -0.009297587 0.0202759542 -0.4597789 0.6280569 -0.049058136 0.03046296
## sd 0.007132299 0.0002130225 0.3530586 0.1908608 0.007010183 0.00727638
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06151788 0.05874158 0.9701292 22.284747 0.0002391457
## sd 0.02056854 0.02016790 0.0103773 6.638708 0.0091204303
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3281.31517 6580.63034 6632.55094 2219.51021 2358.000000
## sd 1.732262 25.84772 49.42726 43.77151 48.64461 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0 0.08740234 0.9125977 0
## sd 0 0 0.28245823 0.2824582 0
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.08059878 0.0212479128 3.816159 0.01274672 0.03893225 0.12226531
## sd 0.02281645 0.0005825912 1.139402 0.04100308 0.02361434 0.02204891
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06766920 0.06491029 0.966950106 24.935697 1.104777e-08
## sd 0.01948835 0.01910418 0.009860482 6.747553 4.400887e-07
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3273.56134 6565.12267 6617.04328 2204.96235 2358.000000
## sd 1.732262 24.64179 47.10499 41.80496 46.08995 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9294434 1 0 0.9294434
## sd 0 0.2561141 0 0 0.2561141
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.01275703 0.0215914703 -0.6063945 0.3820066 -0.05509727 0.02958321
## sd 0.02276319 0.0004845586 1.0615056 0.2740656 0.02216699 0.02338281
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06198511 0.05920994 0.96988961 22.517221 1.683153e-06
## sd 0.02023050 0.01983336 0.01020926 6.553321 3.977900e-05
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.73490 6579.46980 6631.3904 2218.40522 2358.000000
## sd 1.732262 25.44329 48.63943 43.0717 47.84512 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.06982422 0.2592773 0.7407227 0
## sd 0 0.25488165 0.4382916 0.4382916 0
## significant_negative
## mean 0.06982422
## sd 0.25488165
plot_all_curves(lm_deaths_0415, 'lm_deaths_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_deaths_0415_ctrl <- lm(death_rate_0415 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + death_rate_0415_lag,
data = df_us_death_scaled)
summary(lm_deaths_0415_ctrl)
##
## Call:
## lm(formula = death_rate_0415 ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + death_rate_0415_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9826 -0.3573 -0.1845 0.0510 14.3516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002784 0.019506 -0.143 0.88650
## airport_dist 0.030827 0.023066 1.336 0.18153
## conservative -0.164472 0.024153 -6.809 1.24e-11 ***
## male -0.058699 0.021529 -2.727 0.00645 **
## age 0.005348 0.021010 0.255 0.79911
## popdens 0.149665 0.022492 6.654 3.53e-11 ***
## manufact 0.001234 0.024105 0.051 0.95919
## tourism -0.038348 0.022667 -1.692 0.09082 .
## academics -0.118379 0.041756 -2.835 0.00462 **
## medinc 0.167830 0.033048 5.078 4.11e-07 ***
## healthcare 0.023905 0.026021 0.919 0.35835
## temp 0.152491 0.025113 6.072 1.47e-09 ***
## death_rate_0415_lag 0.227223 0.034577 6.572 6.11e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9486 on 2353 degrees of freedom
## Multiple R-squared: 0.1048, Adjusted R-squared: 0.1002
## F-statistic: 22.96 on 12 and 2353 DF, p-value: < 2.2e-16
lm_deaths_0930 <- spec_calculate(df = df_us_death_scaled,
y = "death_rate_0930",
model = "lm",
controls = covariates %>% append('death_rate_0930_lag'),
all.comb = T)
calc_all_summaries(lm_deaths_0930)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean -0.04771222 0.021786652 -2.278399 0.2033106 -0.09043520 -0.004989232
## sd 0.04891033 0.001143027 2.365694 0.2823974 0.04739848 0.050476466
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17340678 0.17098490 0.90938022 73.76279 0.0003284423
## sd 0.08211462 0.08202695 0.04520183 38.89783 0.0127438710
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.5125 6269.0249 6320.9455 1954.8930 2358.000000
## sd 1.732262 118.5453 235.2744 230.2473 194.2011 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.5175781 0.1660156 0.8339844 0.01733398
## sd 0 0.4997519 0.3721401 0.3721401 0.13052845
## significant_negative
## mean 0.5002441
## sd 0.5000610
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.1268692 0.019400070 6.486926 0.001113984 0.08882628 0.16491221
## sd 0.0530807 0.000431052 2.614407 0.003196235 0.05230910 0.05385451
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.18772875 0.18534406 0.90182774 81.06360 3.020223e-31
## sd 0.06656885 0.06643439 0.03691875 31.69162 9.761146e-30
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3106.72700 6231.4540 6283.3746 1921.0215 2358.000000
## sd 1.732262 97.68093 193.4857 188.3267 157.4353 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.006814176 0.0190529070 0.3370688 0.5684355 -0.03054801 0.04417636
## sd 0.014399735 0.0008842485 0.7376599 0.2709864 0.01342749 0.01550550
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17020480 0.16777296 0.91123005 71.84871 0.0002407612
## sd 0.07920734 0.07910241 0.04344308 36.74472 0.0097444833
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3130.5609 6279.1219 6331.0425 1962.4657 2358.000000
## sd 1.732262 113.5638 225.2666 220.1163 187.3254 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.01293945 0.6718750 0.3281250 0.01293945
## sd 0 0.11302718 0.4695879 0.4695879 0.11302718
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.20456071 0.0195666193 10.433477 1.450827e-09 0.16619114 0.24293027
## sd 0.05862264 0.0002714668 2.928455 1.173231e-08 0.05832843 0.05892019
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.21065181 0.20833117 0.88918037 93.99839 4.111013e-56
## sd 0.05703859 0.05688833 0.03205223 29.92537 1.256598e-54
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3073.75974 6165.5195 6217.4401 1866.8085 2358.000000
## sd 1.732262 86.01007 170.1551 165.0649 134.8963 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.09751528 0.0201788237 -4.781985 0.06488922 -0.13708536 -0.05794521
## sd 0.04905087 0.0007283533 2.339181 0.18515988 0.04998104 0.04814510
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.18025226 0.17784851 0.90582179 76.95645 5.561893e-13
## sd 0.07348383 0.07335924 0.04048169 34.39706 2.643324e-11
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3116.8080 6251.6161 6303.537 1938.7034 2358.000000
## sd 1.732262 106.3505 210.8025 205.555 173.7892 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8454590 0.01293945 0.9870605 0
## sd 0 0.3615107 0.11302718 0.1130272 0
## significant_negative
## mean 0.8454590
## sd 0.3615107
plot_all_curves(lm_deaths_0930, 'lm_deaths_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_deaths_0930_ctrl <- lm(death_rate_0930 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + death_rate_0930_lag,
data = df_us_death_scaled)
summary(lm_deaths_0930_ctrl)
##
## Call:
## lm(formula = death_rate_0930 ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + death_rate_0930_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9380 -0.5061 -0.1812 0.2849 5.7208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.001376 0.017228 -0.080 0.93636
## airport_dist 0.094936 0.020382 4.658 3.37e-06 ***
## conservative -0.326855 0.021335 -15.320 < 2e-16 ***
## male -0.078391 0.019013 -4.123 3.87e-05 ***
## age -0.056769 0.018559 -3.059 0.00225 **
## popdens 0.087295 0.019869 4.394 1.16e-05 ***
## manufact 0.036678 0.021292 1.723 0.08509 .
## tourism -0.068205 0.020024 -3.406 0.00067 ***
## academics -0.248127 0.036920 -6.721 2.26e-11 ***
## medinc 0.068018 0.029185 2.331 0.01986 *
## healthcare 0.039224 0.022986 1.706 0.08805 .
## temp 0.453554 0.023106 19.630 < 2e-16 ***
## death_rate_0930_lag 0.171593 0.031177 5.504 4.12e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8379 on 2353 degrees of freedom
## Multiple R-squared: 0.3015, Adjusted R-squared: 0.2979
## F-statistic: 84.64 on 12 and 2353 DF, p-value: < 2.2e-16
cox_onset_socdist <- spec_calculate(df = df_us_socdist_scaled,
y = "Surv(cpt_day_socdist_2, event)",
model = "cox_model",
controls = covariates %>%
append(c('cpt_day_socdist_2_lag', 'onset_prev', 'slope_prev')),
all.comb = T)
cox_onset_socdist <- cox_onset_socdist %>% map(filter_socdist)
cox_onset_socdist_hr <- cox_onset_socdist %>% map(coef_to_hr)
calc_all_summaries(cox_onset_socdist_hr)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 0.90084718 0.0251620611 -4.186946 0.001514851 0.85747908 0.94641117 2366
## sd 0.02327209 0.0008332922 1.134641 0.004378419 0.02144995 0.02529138 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 234.86781 5.412147e-22 224.69959
## sd 0 66.65304 5.712933e-21 65.57137
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 1.657834e-20 223.08702 3.23802e-20 NA
## sd 1.946635e-19 64.68873 4.20639e-19 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.09414009 0.9999987 0.63791890
## sd NA 0.02557470 0.0000000 0.01724889
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0071943487 -15894.97671 31807.95342 31859.87022 2366
## sd 0.0002517063 33.32652 64.80125 60.26247 0
## significant positive negative significant_positive significant_negative
## mean 0.99926758 0 1 0 0.99926758
## sd 0.02705668 0 0 0 0.02705668
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 0.9305632 0.0212930328 -3.439328 0.02976033 0.8924980 0.97025352 2366
## sd 0.0271874 0.0006426558 1.464538 0.04948005 0.0250966 0.02942282 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 230.04233 9.389246e-18 220.31793
## sd 0 65.03569 8.934878e-17 63.96218
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 9.055267e-17 218.1110 1.179495e-16 NA
## sd 8.321553e-16 62.7894 1.056573e-15 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.09230748 0.9999987 0.63684492
## sd NA 0.02505988 0.0000000 0.01686053
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072149160 -15897.38945 31812.77891 31864.69571 2366
## sd 0.0002147792 32.51784 63.01764 57.96853 0
## significant positive negative significant_positive significant_negative
## mean 0.7814941 0 1 0 0.7814941
## sd 0.4132829 0 0 0 0.4132829
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.02426578 0.0213783358 1.1179169 0.3310683 0.98223389 1.0680964 2366
## sd 0.01204374 0.0001072928 0.5488602 0.2332412 0.01152189 0.0125931 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 217.59045 4.479032e-13 207.16637
## sd 0 74.34435 3.637662e-12 73.62063
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 4.870524e-12 205.28108 6.898468e-12 NA
## sd 3.612500e-11 72.55058 4.849513e-11 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.08741196 0.9999987 0.63445954
## sd NA 0.02878782 0.0000000 0.01893728
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072780187 -15903.61539 31825.23079 31877.14759 2366
## sd 0.0002501295 37.17218 72.42525 67.58321 0
## significant positive negative significant_positive significant_negative
## mean 0.03979492 1 0 0.03979492 0
## sd 0.19550094 0 0 0.19550094 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.00925711 0.0215170876 0.3206578 0.1308660 0.96752121 1.05279642 2366
## sd 0.04240161 0.0008720833 1.9443295 0.1687364 0.03927003 0.04575063 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 219.91522 3.098970e-13 209.08456
## sd 0 74.34997 2.452981e-12 72.81736
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 3.048073e-12 207.31414 4.251963e-12 NA
## sd 2.230823e-11 71.92873 2.986865e-11 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.08830803 0.9999987 0.63489406
## sd NA 0.02877147 0.0000000 0.01796452
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072595031 -15902.45301 31822.90601 31874.82281 2366
## sd 0.0002430652 37.17499 72.45016 67.66904 0
## significant positive negative significant_positive significant_negative
## mean 0.4101562 0.5031738 0.4968262 0.2971191 0.1130371
## sd 0.4919219 0.5000510 0.5000510 0.4570452 0.3166768
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.07129629 0.0227821273 3.028251 0.05228809 1.02453397 1.12019435 2366
## sd 0.02975165 0.0005711819 1.264375 0.10005770 0.02922361 0.03029421 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 226.77298 2.340417e-18 216.21052
## sd 0 66.03596 2.269295e-17 64.65633
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 1.974366e-17 213.98344 2.566388e-17 NA
## sd 1.664079e-16 63.52385 2.065698e-16 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.09104196 0.9999987 0.63592775
## sd NA 0.02545169 0.0000000 0.01772905
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072763743 -15899.02413 31816.04825 31867.96505 2366
## sd 0.0002555284 33.01798 64.09478 59.27922 0
## significant positive negative significant_positive significant_negative
## mean 0.7404785 1 0 0.7404785 0
## sd 0.4384256 0 0 0.4384256 0
plot_all_curves(cox_onset_socdist_hr, 'cox_onset_socdist_hr', hr = T)
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
cox_onset_socdist_ctrl <- coxph(Surv(cpt_day_socdist_2, event) ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + onset_prev + slope_prev +
cpt_day_socdist_2_lag,
data = df_us_socdist_scaled)
summary(cox_onset_socdist_ctrl)
## Call:
## coxph(formula = Surv(cpt_day_socdist_2, event) ~ airport_dist +
## conservative + male + age + popdens + manufact + tourism +
## academics + medinc + healthcare + temp + onset_prev + slope_prev +
## cpt_day_socdist_2_lag, data = df_us_socdist_scaled)
##
## n= 2366, number of events= 2365
##
## coef exp(coef) se(coef) z Pr(>|z|)
## airport_dist -0.12768 0.88013 0.02573 -4.962 6.98e-07 ***
## conservative 0.09538 1.10007 0.02666 3.577 0.000347 ***
## male -0.05323 0.94816 0.02335 -2.279 0.022655 *
## age -0.01756 0.98259 0.02205 -0.797 0.425615
## popdens -0.07439 0.92831 0.02794 -2.663 0.007750 **
## manufact 0.04968 1.05093 0.02457 2.022 0.043180 *
## tourism -0.11767 0.88899 0.02528 -4.655 3.24e-06 ***
## academics 0.02042 1.02063 0.04352 0.469 0.639026
## medinc -0.07733 0.92558 0.03477 -2.224 0.026136 *
## healthcare 0.03931 1.04009 0.02802 1.403 0.160717
## temp -0.29100 0.74752 0.02704 -10.761 < 2e-16 ***
## onset_prev -0.03523 0.96539 0.03418 -1.031 0.302745
## slope_prev -0.11930 0.88754 0.03333 -3.580 0.000344 ***
## cpt_day_socdist_2_lag -0.23676 0.78918 0.04751 -4.983 6.25e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## airport_dist 0.8801 1.1362 0.8368 0.9257
## conservative 1.1001 0.9090 1.0441 1.1591
## male 0.9482 1.0547 0.9057 0.9926
## age 0.9826 1.0177 0.9410 1.0260
## popdens 0.9283 1.0772 0.8789 0.9806
## manufact 1.0509 0.9515 1.0015 1.1028
## tourism 0.8890 1.1249 0.8460 0.9341
## academics 1.0206 0.9798 0.9372 1.1115
## medinc 0.9256 1.0804 0.8646 0.9909
## healthcare 1.0401 0.9615 0.9845 1.0988
## temp 0.7475 1.3378 0.7089 0.7882
## onset_prev 0.9654 1.0359 0.9028 1.0323
## slope_prev 0.8875 1.1267 0.8314 0.9474
## cpt_day_socdist_2_lag 0.7892 1.2671 0.7190 0.8662
##
## Concordance= 0.661 (se = 0.007 )
## Likelihood ratio test= 344.5 on 14 df, p=<2e-16
## Wald test = 320.8 on 14 df, p=<2e-16
## Score (logrank) test = 325.7 on 14 df, p=<2e-16
lm_mean_socdist <- spec_calculate(df = df_us_socdist_scaled,
y = "mean_diff_socdist_2",
model = "lm",
controls = covariates %>%
append(c('mean_diff_socdist_2_lag', 'onset_prev', 'slope_prev')),
all.comb = T)
lm_mean_socdist <- lm_mean_socdist %>% map(filter_socdist)
calc_all_summaries(lm_mean_socdist)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.14021047 0.0194929117 7.205979 8.705399e-07 0.10198542 0.17843551
## sd 0.03452777 0.0006951486 1.812431 7.476719e-06 0.03461717 0.03449206
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.35328573 0.35083507 0.80530040 146.96585 1.503377e-124
## sd 0.04184288 0.04168147 0.02561953 24.32935 4.585680e-123
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2838.6814 5699.363 5762.8213 1529.47925 2356.000000
## sd 1.732262 75.1761 148.104 141.8935 98.95842 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean -0.09138612 0.0175028708 -5.202841 0.0001219264 -0.12570875 -0.05706349
## sd 0.02382600 0.0005762093 1.266658 0.0005332005 0.02445777 0.02323204
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.3459287 0.34345301 0.80980432 141.97404 1.794927e-99
## sd 0.0455359 0.04536277 0.02764305 22.82046 5.855583e-98
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2851.70314 5725.4063 5788.8648 1546.8787 2356.000000
## sd 1.732262 80.44351 158.4916 151.8134 107.6924 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 0 1 0
## sd 0 0 0 0 0
## significant_negative
## mean 1
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean -0.005150281 0.0170325770 -0.3254585 0.3943169 -0.03855068 0.02825012
## sd 0.018590856 0.0005492686 1.0726356 0.2595924 0.01788979 0.01932655
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.33818294 0.33567872 0.81453789 137.17003 1.467243e-86
## sd 0.04833435 0.04817127 0.02914621 23.33049 5.538707e-85
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2865.36119 5752.7224 5816.1809 1565.1974 2356.000000
## sd 1.732262 84.18746 166.0151 159.4343 114.3107 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.04589844 0.2880859 0.7119141 0.03637695
## sd 0 0.20929038 0.4529266 0.4529266 0.18724911
## significant_negative
## mean 0.009521484
## sd 0.097124295
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean -0.07066438 0.0179947707 -3.905691 0.02573413 -0.10595161 -0.03537715
## sd 0.03237484 0.0005958765 1.737321 0.07033431 0.03290196 0.03188185
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.34307315 0.34058648 0.81157397 140.18752 5.652261e-98
## sd 0.04555493 0.04538166 0.02759323 22.68894 2.193977e-96
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2856.87873 5735.7575 5799.2160 1553.6320 2356.000000
## sd 1.732262 80.11993 157.8544 151.2078 107.7374 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8581543 0 1 0
## sd 0 0.3489344 0 0 0
## significant_negative
## mean 0.8581543
## sd 0.3489344
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean 0.05788521 0.0182294883 3.197587 0.0937796 0.02213770 0.09363271
## sd 0.03644144 0.0005360911 2.007429 0.2089676 0.03681758 0.03609200
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.3416623 0.33917212 0.81234918 139.36614 2.498468e-86
## sd 0.0500918 0.04993868 0.03028064 24.26272 9.696779e-85
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2858.86918 5739.7384 5803.1969 1556.9687 2356.000000
## sd 1.732262 87.63406 172.9206 166.3678 118.4671 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.7509766 0.8999023 0.1000977 0.7460938
## sd 0 0.4325002 0.3001668 0.3001668 0.4352977
## significant_negative
## mean 0.004882812
## sd 0.069714828
plot_all_curves(lm_mean_socdist, 'lm_mean_socdist')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_mean_socdist_ctrl <- lm(mean_diff_socdist_2 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + onset_prev + slope_prev +
mean_diff_socdist_2_lag,
data = df_us_socdist_scaled)
summary(lm_mean_socdist_ctrl)
##
## Call:
## lm(formula = mean_diff_socdist_2 ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + onset_prev + slope_prev + mean_diff_socdist_2_lag,
## data = df_us_socdist_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6309 -0.2614 0.0611 0.3676 2.6202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0023280 0.0157422 0.148 0.88245
## airport_dist -0.0964326 0.0186435 -5.172 2.51e-07 ***
## conservative -0.1728838 0.0201691 -8.572 < 2e-16 ***
## male -0.0058333 0.0175253 -0.333 0.73928
## age 0.0833188 0.0171674 4.853 1.29e-06 ***
## popdens 0.1215938 0.0184869 6.577 5.88e-11 ***
## manufact -0.0236647 0.0195175 -1.212 0.22545
## tourism 0.0744721 0.0184003 4.047 5.35e-05 ***
## academics 0.0745873 0.0341493 2.184 0.02905 *
## medinc 0.2866240 0.0272018 10.537 < 2e-16 ***
## healthcare 0.0008116 0.0210337 0.039 0.96922
## temp 0.0572502 0.0208304 2.748 0.00603 **
## onset_prev -0.0587622 0.0252012 -2.332 0.01980 *
## slope_prev 0.0714963 0.0237801 3.007 0.00267 **
## mean_diff_socdist_2_lag 0.3108043 0.0273882 11.348 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7657 on 2351 degrees of freedom
## Multiple R-squared: 0.4172, Adjusted R-squared: 0.4138
## F-statistic: 120.2 on 14 and 2351 DF, p-value: < 2.2e-16
results_us <- list(cox_onset_prev_hr, lm_slope_prev, lm_slope_prev_var,
lm_cases_0415, lm_cases_0930, lm_deaths_0415, lm_deaths_0930,
cox_onset_socdist_hr, lm_mean_socdist)
names(results_us) <- list('cox_onset_prev_hr', 'lm_slope_prev', 'lm_slope_prev_var',
'lm_cases_0415', 'lm_cases_0930', 'lm_deaths_0415', 'lm_deaths_0930',
'cox_onset_socdist_hr', 'lm_mean_socdist')
save(results_us, file="US_spec_results.RData")
death_temp <- df_us_death_unscaled %>% select(county_fips:death_rate_0930)
onset_temp <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)
df_us_desc <- df_us_socdist_unscaled %>%
plyr::join(death_temp, by="county_fips") %>%
dplyr::select(pers_o, pers_c, pers_e, pers_a, pers_n,
age, male, conservative,
academics, medinc, manufact,
airport_dist, tourism, healthcare, popdens,
temp, onset_prev, slope_prev,
case_rate_0415, case_rate_0930, death_rate_0415, death_rate_0930,
cpt_day_socdist_2, mean_diff_socdist_2)
df_us_desc %>% select(age) %>% summary()
## age
## Min. :22.90
## 1st Qu.:37.70
## Median :40.70
## Mean :40.59
## 3rd Qu.:43.40
## Max. :67.00
us_means <- df_us_desc %>% summarise_all(mean)
us_sd <- df_us_desc %>% summarise_all(sd)
desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame()
names(desc_us) <- c('mean', 'sd')
desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us
a <- df_us_socdist_unscaled %>% select(county_fips, pers_o:pers_n, cpt_day_socdist, mean_diff_socdist)
b <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)
c <- df_us_death_unscaled %>% select(county_fips, case_rate_0415:death_rate_0930)
us_joined <- plyr::join_all(list(a, b, c), by='county_fips')
us_joined %>% select(-county_fips) %>% cor(use = 'pairwise.complete')
## pers_o pers_a pers_e pers_c pers_n
## pers_o 1.00000000 -0.15281714 -0.08497946 -0.04802258 -0.22873070
## pers_a -0.15281714 1.00000000 0.24480399 0.65337156 -0.39348271
## pers_e -0.08497946 0.24480399 1.00000000 0.15985220 -0.39790623
## pers_c -0.04802258 0.65337156 0.15985220 1.00000000 -0.40801964
## pers_n -0.22873070 -0.39348271 -0.39790623 -0.40801964 1.00000000
## cpt_day_socdist 0.13992321 0.08453292 0.02403108 0.09085251 -0.12512306
## mean_diff_socdist 0.33391055 -0.23412700 -0.01723667 -0.21269607 0.06173468
## onset_prev -0.29216003 -0.09682226 -0.07478377 -0.09611230 0.26005706
## slope_prev 0.19638054 0.12336567 0.06290779 0.09638924 -0.18203280
## case_rate_0415 0.14548085 0.07765531 0.05022219 0.04619685 -0.12545916
## death_rate_0415 0.08588663 0.09385457 0.02045067 0.05738798 -0.06854497
## case_rate_0930 -0.09448026 0.32267793 0.07255061 0.23696948 -0.20943857
## death_rate_0930 -0.01762812 0.31188291 0.02183955 0.22741935 -0.12688753
## cpt_day_socdist mean_diff_socdist onset_prev slope_prev
## pers_o 0.13992321 0.33391055 -0.29216003 0.19638054
## pers_a 0.08453292 -0.23412700 -0.09682226 0.12336567
## pers_e 0.02403108 -0.01723667 -0.07478377 0.06290779
## pers_c 0.09085251 -0.21269607 -0.09611230 0.09638924
## pers_n -0.12512306 0.06173468 0.26005706 -0.18203280
## cpt_day_socdist 1.00000000 0.15266953 -0.14248381 0.19833550
## mean_diff_socdist 0.15266953 1.00000000 -0.26415088 0.27105720
## onset_prev -0.14248381 -0.26415088 1.00000000 -0.72372322
## slope_prev 0.19833550 0.27105720 -0.72372322 1.00000000
## case_rate_0415 0.15305730 0.24381339 -0.47200894 0.86665276
## death_rate_0415 0.12568626 0.19568034 -0.39929052 0.75658883
## case_rate_0930 0.14915336 -0.40302394 -0.14180907 0.22824887
## death_rate_0930 0.20710211 -0.09734626 -0.27900660 0.51420879
## case_rate_0415 death_rate_0415 case_rate_0930 death_rate_0930
## pers_o 0.14548085 0.08588663 -0.09448026 -0.01762812
## pers_a 0.07765531 0.09385457 0.32267793 0.31188291
## pers_e 0.05022219 0.02045067 0.07255061 0.02183955
## pers_c 0.04619685 0.05738798 0.23696948 0.22741935
## pers_n -0.12545916 -0.06854497 -0.20943857 -0.12688753
## cpt_day_socdist 0.15305730 0.12568626 0.14915336 0.20710211
## mean_diff_socdist 0.24381339 0.19568034 -0.40302394 -0.09734626
## onset_prev -0.47200894 -0.39929052 -0.14180907 -0.27900660
## slope_prev 0.86665276 0.75658883 0.22824887 0.51420879
## case_rate_0415 1.00000000 0.80803632 0.19781085 0.46226490
## death_rate_0415 0.80803632 1.00000000 0.14786594 0.48273071
## case_rate_0930 0.19781085 0.14786594 1.00000000 0.59222976
## death_rate_0930 0.46226490 0.48273071 0.59222976 1.00000000
us_means <- us_joined %>% select(-county_fips) %>% summarise_all(mean)
us_sd <- us_joined %>% select(-county_fips) %>% summarise_all(sd)
desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame() %>%
rename(mean=V1, sd=V2)
desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us
# get county names
counties <- read_csv('US_personality_2.csv') %>%
select(county, county_name) %>%
dplyr::rename(county_fips = county) %>%
mutate(county_fips = as.character(county_fips)) %>%
distinct()
df_us_prev <- read_csv('US_prevalence.csv')
df_us_prev <- df_us_prev %>%
select(fips, date, rate) %>%
mutate(date = as.Date(date, "%d%b%Y")) %>%
dplyr::rename(county_fips = fips,
rate_day = rate) %>%
mutate(county_fips = as.character(county_fips)) %>%
filter(rate_day >= 0 & rate_day <= 1000)
# merge with processed data frames
df_us_prev <- df_us_prev %>% inner_join(counties, by = 'county_fips')
df_us_socdist <- df_us_socdist %>% inner_join(counties, by = 'county_fips')
gg_prev <- df_us_prev %>%
filter(county_fips == '8037' | county_fips == '19079') %>%
filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
mutate(window = ifelse(date >= '2020-03-15' & date <= '2020-04-15',
'in window', 'out of window')) %>%
ggplot(aes(x=date, y=rate_day)) +
geom_point(aes(col=county_name)) +
geom_line(aes(col=county_name)) +
theme_light() +
theme(legend.position="bottom", legend.margin=margin(t=-20)) +
guides(color=guide_legend(title="")) +
annotate('rect', xmin = as.Date('2020-03-15'),
xmax = as.Date('2020-04-15'),
ymin = -Inf, ymax = Inf, alpha = 0.07) +
geom_segment(aes(x = as.Date('2020-04-07'), y = 0,
xend = as.Date('2020-04-07'), yend = 1),
colour = "#00BFC4") +
geom_segment(aes(x = as.Date('2020-03-09'), y = 0,
xend = as.Date('2020-03-09'), yend = 1),
colour = "#F8766D") +
annotate("text", x = as.Date('2020-04-07'),
y = 2, label = "Onset \n Hamilton County", color = "#00BFC4", size = 3) +
annotate("text", x = as.Date('2020-03-09'),
y = 2, label = "Onset \n Eagle County", color = "#F8766D", size = 3) +
annotate("text", x = as.Date('2020-03-31'),
y = 9.5, label = "Time window used for\nanalysis of growth rates",
color = "grey50", size = 3) +
ylab('Prevalence') +
xlab('') +
ylim(c(-1,10)) +
ggtitle("COVID-19 - comparison of onsets and \n growth rates in two counties") +
theme(plot.title = element_text(size=11)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
gg_prev
gg_socdist <- df_us_socdist %>%
filter(county_fips == 19137 | county_fips == 36081) %>%
filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
ggplot(aes(x=date, y=socdist_tiles)) +
#facet_wrap(~countyname) +
geom_point(aes(col=county_name)) +
geom_line(aes(col=county_name)) +
theme_light() +
theme(legend.position="bottom",legend.margin=margin(t=-20)) +
guides(color=guide_legend(title="")) +
geom_segment(aes(x = as.Date('2020-03-23'), y = -.6,
xend = as.Date('2020-03-23'), yend = 0),
colour = "#00BFC4") +
geom_segment(aes(x = as.Date('2020-03-23'), y = -.6,
xend = as.Date('2020-04-28'), yend = -.6),
colour = "#00BFC4") +
geom_segment(aes(x = as.Date('2020-03-14'), y = -.156,
xend = as.Date('2020-03-14'), yend = .17),
colour = "#F8766D") +
geom_segment(aes(x = as.Date('2020-03-14'), y = -.156,
xend = as.Date('2020-04-28'), yend = -.156),
colour = "#F8766D") +
annotate("text", x = as.Date('2020-03-26'),
y = .08, label = "Change point\nQueens County", color = "#00BFC4", size = 3) +
annotate("text", x = as.Date('2020-03-16'),
y = .25, label = "Change point\nMontgommery County", color = "#F8766D", size = 3) +
ylab('Social distancing') +
xlab("") +
ylim(c(-.7,.3)) +
ggtitle("Social distancing - comparison of change \n points and means in two counties") +
theme(plot.title = element_text(size=11)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
gg_socdist
figure <- ggarrange(gg_prev, gg_socdist,
labels = c("A", "B"),
ncol = 2, nrow = 1)
figure
# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
max(df_us_prev$date), 1)
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
tto <- df_us_prev_unscaled %>%
merge(df_dates, by.x = 'onset_prev', by.y = 'time') %>%
filter(event==1) %>%
ggplot(aes(x=date+1)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - COVID-19 Onset') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Date of March')
soa <- df_us_slope_prev %>%
ggplot(aes(x=slope_prev)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - COVID-19 Growth Rates') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Standardized growth rates')
figure <- ggarrange(tto, soa,
labels = c("", ""),
ncol = 2, nrow = 1)
figure
tto <- df_us_death_unscaled %>%
ggplot(aes(x=case_rate_0930)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Cumulative Case Rates') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Cases Per 1000 Inhabitants')
soa <- df_us_death_unscaled %>%
ggplot(aes(x=death_rate_0930)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Cumulative Death Rates') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Deaths Per 1000 Inhabitants')
figure <- ggarrange(tto, soa,
labels = c("", ""),
ncol = 2, nrow = 1)
figure
# create sequence of dates
date_sequence <- seq.Date(min(df_us_socdist$date),
max(df_us_socdist$date), 1)
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
tto <- df_us_cpt_socdist %>%
merge(df_dates, by.x = 'cpt_day_socdist_2', by.y = 'time') %>%
filter(cpt_day_socdist_2 < 30) %>%
ggplot(aes(x=date+1)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Time to Adoption') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Date') +
xlim(c(as.Date('2020-03-01'), as.Date('2020-03-31')))
soa <- df_us_cpt_socdist %>%
ggplot(aes(x=mean_diff_socdist_2)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Strength of Adjustment') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Standardized mean difference')
figure <- ggarrange(tto, soa,
labels = c("", ""),
ncol = 2, nrow = 1)
figure
df_us_google <- read_csv("../Google/2020_US_Region_Mobility_Report.csv")
df_us_fb <- df_us_socdist %>%
select(county_fips, date, socdist_tiles, socdist_single_tile)
df_us_ggl <- df_us_google %>%
select(census_fips_code, date,
retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline) %>%
drop_na() %>%
mutate(county_fips = as.character(as.numeric(census_fips_code))) %>%
select(-census_fips_code)
df_us_fb_ggl <- df_us_fb %>% plyr::join(df_us_ggl, c('county_fips', 'date')) %>% drop_na()
df_us_fb_ggl %>% split(.$county_fips) %>%
map(~ map_if(., is.numeric, scale)) %>%
bind_rows() %>%
select(socdist_tiles:residential_percent_change_from_baseline) %>%
drop_na() %>%
cor() %>%
as.data.frame() %>%
select(c(1,2)) %>% round(3) %>% write.csv()
## "","socdist_tiles","socdist_single_tile"
## "socdist_tiles",1,-0.971
## "socdist_single_tile",-0.971,1
## "retail_and_recreation_percent_change_from_baseline",0.945,-0.939
## "grocery_and_pharmacy_percent_change_from_baseline",0.766,-0.795
## "parks_percent_change_from_baseline",0.44,-0.442
## "transit_stations_percent_change_from_baseline",0.889,-0.888
## "workplaces_percent_change_from_baseline",0.919,-0.925
## "residential_percent_change_from_baseline",-0.906,0.897
load("US_spec_results.RData")
load("../GER/GER_spec_results.RData")
plot_dist <- function(df, filename){
w <- 5
h <- 5
file_path_eps <- paste0("../../Plots/COMP/", filename,".eps")
file_path_pdf <- paste0("../../Plots/COMP/", filename,".pdf")
file_path_png <- paste0("../../Plots/COMP/", filename,".png")
ggplot(df, aes(x=estimate, fill=country), alpha=.3) +
geom_histogram(alpha=.5, position="identity") +
#coord_fixed(ratio = 0.1) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
theme(axis.title.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank()) +
theme(legend.position="none") +
ggsave(file=file_path_eps, device = 'eps', width=w, height=h)+
ggsave(file=file_path_pdf, device = 'pdf', width=w, height=h)+
ggsave(file=file_path_png, device = 'png', width=w, height=h)
}
# define function to plot all curves in list
plot_all_dist <- function(ls, analysis, hr=F){
pers <- c('o', 'c', 'e', 'a', 'n')
filenames <- as.list(paste0(analysis, '_', pers))
pmap(list(ls, filenames), plot_dist)
}
join_results <- function(list_us, list_ger){
US = "US"
GER = "GER"
list_us <- list_us %>%
map(~cbind(.x, US) %>% rename(country=US))
list_ger <- list_ger %>%
map(~cbind(.x, GER) %>% rename(country=GER))
map2(list_us, list_ger, rbind)
}
join_results(results_us$cox_onset_prev_hr, results_ger$cox_onset_prev_hr) %>%
plot_all_dist(analysis = 'cox_onset_prev_hr')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_slope_prev, results_ger$lm_slope_prev) %>%
plot_all_dist(analysis = 'lm_slope_prev')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_slope_prev_var, results_ger$lm_slope_prev_var) %>%
plot_all_dist(analysis = 'lm_slope_prev_var')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_cases_0415, results_ger$lm_cases_0331) %>%
plot_all_dist(analysis = 'lm_cases_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_cases_0930, results_ger$lm_cases_0930) %>%
plot_all_dist(analysis = 'lm_cases_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_deaths_0415, results_ger$lm_deaths_0331) %>%
plot_all_dist(analysis = 'lm_deaths_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_deaths_0930, results_ger$lm_deaths_0930) %>%
plot_all_dist(analysis = 'lm_deaths_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$cox_onset_socdist_hr, results_ger$cox_onset_socdist_hr) %>%
plot_all_dist(analysis = 'cox_onset_socdist_hr')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_mean_socdist, results_ger$lm_mean_socdist) %>%
plot_all_dist(analysis = 'lm_mean_socdist')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
calculate_effsize <- function(df){
df %>% summarise(d = effsize::cohen.d(estimate ~ country)$estimate,
pval = t.test(estimate ~ country, var.equal = TRUE)$p.value)
}
all_ttest <- map2(results_us, results_ger, join_results) %>% flatten() %>%
map(calculate_effsize) %>% bind_rows(.id = "model")
models <- rep(names(results_us), each=5)
all_ttest <- cbind(models, all_ttest)
all_ttest
save(all_ttest, file='ttest_results.RData')
df_us_prev <- read_csv('US_prevalence.csv')
df_us_prev <- df_us_prev %>%
select(fips, date, rate) %>%
mutate(date = as.Date(date, "%d%b%Y")) %>%
dplyr::rename(county_fips = fips,
rate_day = rate) %>%
mutate(county_fips = as.character(county_fips)) %>%
filter(rate_day >= 0 & rate_day <= 1000) %>%
filter(date <= '2020-04-15')
df_us_prev %>% write_csv('us_prev_fips.csv')
df_us_death <- read_csv('US_prevalence.csv')
df_us_death <-df_us_death %>%
mutate(date = as.Date(date, "%d%b%Y"),
fips = as.character(fips))
df_us_death_0930 <- df_us_death %>%
filter(date == '2020-09-30') %>%
dplyr::rename(county_fips = fips,
case_rate_0930 = rate,
death_rate_0930 = rate_death) %>%
select(county_fips, case_rate_0930, death_rate_0930)
df_us_death_0415 <- df_us_death %>%
filter(date == '2020-04-15') %>%
dplyr::rename(county_fips = fips,
case_rate_0415 = rate,
death_rate_0415 = rate_death) %>%
select(county_fips, case_rate_0415, death_rate_0415)
df_us_death <- merge(df_us_death_0415, df_us_death_0930)
# save df
df_us_death %>% write_csv('us_death_county_maps.csv')
df_us_pers <- read_csv('US_personality_2.csv')
df_us_pers %>% filter(freq >= 50) %>% .$freq %>% sd()
## [1] 3175.839
df_us_pers <- df_us_pers %>%
dplyr::rename(county_fips = county,
pers_o = open,
pers_c = sci,
pers_e = extra,
pers_a = agree,
pers_n = neuro) %>%
filter(freq >= 50) %>%
select(-county_name, -freq) %>%
mutate(county_fips = as.character(county_fips))
df_us_pers %>% .$freq %>% mean
## [1] NA
df_us_ctrl <- read.csv('US_controls_2.csv')
df_us_ctrl <- df_us_ctrl %>% select(-county_name) %>%
rename(county_fips = county,
airport_dist = airport_distance,
conservative = republican,
age = medage,
healthcare = physician_pc) %>%
mutate(county_fips = as.character(county_fips),
male=male*100,
tourism=tourism*100,
manufact=manufact*100,
academics=academics*100)
df_us_temp <- read_csv("US_controls_weather.csv") %>%
filter(month %in% c('Mar')) %>%
group_by(fips) %>% summarise(temp = mean(tempc)) %>%
rename(county_fips = fips) %>%
mutate(county_fips = as.character(as.numeric(county_fips)))
df_us_ctrl <- df_us_ctrl %>% plyr::join(df_us_temp, 'county_fips')
# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
max(df_us_prev$date), 1)
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
# join data frames
df_us_prev <- df_us_prev %>%
plyr::join(df_us_ctrl, by='county_fips') %>%
plyr::join(df_us_pers, by='county_fips') %>%
merge(df_dates, by='date') %>%
arrange(county_fips, date) %>%
drop_na()
df_us_prev %>% select(-date, -rate_day, -time) %>%
distinct() %>% write_csv('df_us_pers_fips.csv')
df_us_prev %>% select(county_fips) %>% distinct() %>% nrow()
## [1] 2486
# join data frames
df_us_death <- df_us_death %>%
plyr::join(df_us_ctrl, by='county_fips') %>%
plyr::join(df_us_pers, by='county_fips') %>%
drop_na()
# save df
df_us_death %>%
#select(county_fips, case_rate, death_rate) %>%
write_csv('us_death_fips_controls.csv')
easter <- seq.Date(as.Date('2020-04-10'), as.Date('2020-04-13'), 1)
df_us_loess <- df_us_socdist %>%
mutate(weekday = format(date, '%u')) %>%
filter(!weekday %in% c('6','7') | date %in% easter) %>%
split(.$county_fips) %>%
map(~ loess(socdist_single_tile ~ time, data = .)) %>%
map(predict, 1:max(df_us_socdist$time)) %>%
bind_rows() %>%
gather(key = 'county_fips', value = 'loess') %>%
group_by(county_fips) %>%
mutate(time = row_number())
df_us_loess_2 <- df_us_socdist %>%
mutate(weekday = format(date, '%u')) %>%
filter(!weekday %in% c('6','7') | date %in% easter) %>%
split(.$county_fips) %>%
map(~ loess(socdist_tiles ~ time, data = .)) %>%
map(predict, 1:max(df_us_socdist$time)) %>%
bind_rows() %>%
gather(key = 'county_fips', value = 'loess') %>%
rename(loess_2 = loess) %>%
group_by(county_fips) %>%
mutate(time = row_number())
df_us_socdist <- df_us_socdist %>%
merge(df_us_loess, by=c('county_fips', 'time')) %>%
merge(df_us_loess_2, by=c('county_fips', 'time')) %>%
mutate(weekday = format(date, '%u')) %>%
mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7') | date %in% easter,
loess, socdist_single_tile),
socdist_tiles_clean = ifelse(weekday %in% c('6','7') | date %in% easter,
loess_2, socdist_tiles)) %>%
arrange(county_fips, time) %>%
select(-weekday)
df_us_socdist <- df_us_socdist %>% drop_na() %>% mutate(time = time-1)
# get onset day
df_us_onset_prev <- df_us_prev %>%
group_by(county_fips) %>%
filter(rate_day > 0.1) %>%
summarise(onset_prev = min(time))
# merge with county data
df_us_onset_prev <- df_us_prev %>%
select(-date, -time, -rate_day) %>%
distinct() %>%
mutate(county_fips = as.character(county_fips)) %>%
left_join(df_us_onset_prev, by = 'county_fips')
# handle censored data
df_us_onset_prev <- df_us_onset_prev %>%
mutate(event = ifelse(is.na(onset_prev), 0, 1)) %>%
mutate(onset_prev = replace_na(onset_prev, as.numeric(diff(range(df_us_prev$date)))+1))
# cut time series
df_us_prev <- df_us_prev %>%
filter(date > '2020-03-15' & date < '2020-04-15')
# extract slope prevalence
df_us_slope_prev <- df_us_prev %>%
split(.$county_fips) %>%
map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
map(coef) %>%
map_dbl('time') %>%
as.data.frame() %>%
rownames_to_column('county_fips') %>%
rename(slope_prev = '.')
# merge with county data
df_us_slope_prev <- df_us_onset_prev %>%
inner_join(df_us_slope_prev, by = 'county_fips')
# get unscaled object for descriptives
df_us_prev_desc <- df_us_slope_prev
# cut time series before onset
df_us_slope_var <- df_us_prev %>%
filter(date <= '2020-05-15') %>%
group_by(county_fips) %>%
filter(rate_day > 0.1) %>%
mutate(time = time-min(time)+1) %>%
ungroup() %>%
filter(time <= 30)
# extract slope prevalence
df_us_slope_var <- df_us_slope_var %>%
split(.$county_fips) %>%
map(~ lm(log(rate_day+1) ~ time, data = .)) %>%
map(coef) %>%
map_dbl('time') %>%
as.data.frame() %>%
rownames_to_column('county_fips') %>%
rename(slope_prev_var = '.')
# merge with county data
df_us_slope_prev <- df_us_slope_prev %>%
left_join(df_us_slope_var, by = 'county_fips') %>%
mutate(slope_prev_var = replace_na(slope_prev_var, 0))
df_us_prev_unscaled <- inner_join(df_us_slope_prev, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_death_unscaled <- inner_join(df_us_death, df_us_cpt_socdist %>% select(county_fips), by='county_fips')
df_us_socdist_unscaled <- inner_join(df_us_cpt_socdist, df_us_slope_prev %>% select(county_fips), by='county_fips')
fips_list <- df_us_prev_unscaled %>% select(county_fips)
df_us_prev_unscaled %>% merge(df_us_death_unscaled %>% select(county_fips:death_rate_0930)) %>%
merge(df_us_socdist_unscaled %>% select(county_fips, cpt_day_socdist_2, mean_diff_socdist_2)) %>%
select(-event) %>% write_csv('us_all_variables.csv')
df_us_prev_scaled <- df_us_prev_unscaled %>%
mutate_at(vars(-county_fips, -event), scale)
df_us_death_scaled <- df_us_death_unscaled %>%
mutate_at(vars(-county_fips), scale)
df_us_socdist_scaled <- df_us_socdist_unscaled %>%
mutate_at(vars(-county_fips, -event), scale)
# create UDF to calculate lagged variables
calc_lags <- function(df, weights, cols){
cols_only <- df %>% select(cols)
cols_lag <- weights %*% as.matrix(cols_only) %>%
as.matrix() %>% as.data.frame()
names(cols_lag) <- paste0(names(cols_lag), '_lag')
return(cols_lag)
}
# read_weight matrix
weight_mat_norm <- read_csv('df_us_spatial_weights_matrix.csv')
##
## -- Column specification --------------------------------------------------------------------
## cols(
## .default = col_double()
## )
## i Use `spec()` for the full column specifications.
weight_mat_norm <- weight_mat_norm %>% select(-county_fips) %>% as.matrix()
# generate spatially lagged y
y_only <- df_us_prev_scaled %>% select(onset_prev,slope_prev,slope_prev_var) %>% names()
y_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, y_only)
# generate spatially lagged X
X_only <- df_us_prev_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_prev_scaled, weight_mat_norm, X_only)
# bind new variables to df
df_us_prev_scaled <- cbind(df_us_prev_scaled, y_lag, X_lag)
# generate spatially lagged y
y_only <- df_us_death_scaled %>% select(case_rate_0415:death_rate_0930) %>% names()
y_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, y_only)
# generate spatially lagged X
X_only <- df_us_death_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_death_scaled, weight_mat_norm, X_only)
# bind new variables to df
df_us_death_scaled <- cbind(df_us_death_scaled, y_lag, X_lag)
# generate spatially lagged y
y_only <- df_us_socdist_scaled %>% select(cpt_day_socdist_2,mean_diff_socdist_2) %>% names()
y_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, y_only)
# generate spatially lagged X
X_only <- df_us_socdist_scaled %>% select(airport_dist:pers_n) %>% names()
X_lag <- calc_lags(df_us_socdist_scaled, weight_mat_norm, X_only)
# bind new variables to df
df_us_socdist_scaled <- cbind(df_us_socdist_scaled, y_lag, X_lag)
write_csv(df_us_prev_scaled, 'df_us_slope_prev.csv')
write_csv(df_us_death_scaled, 'df_us_death_scaled.csv')
write_csv(df_us_socdist_scaled, 'df_us_cpt_socdist.csv')
# define function to calculate specification curve analyses for all traits
spec_calculate <- function(df, y, model, controls, all.comb = T){
spec_results_o <- run_specs(df = df,
y = c(y), x = c("pers_o"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_c <- run_specs(df = df,
y = c(y), x = c("pers_c"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_e <- run_specs(df = df,
y = c(y), x = c("pers_e"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_a <- run_specs(df = df,
y = c(y), x = c("pers_a"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results_n <- run_specs(df = df,
y = c(y), x = c("pers_n"),
model = c(model), controls = controls,
all.comb = all.comb)
spec_results <- list(spec_results_o, spec_results_c, spec_results_e,
spec_results_a, spec_results_n)
names(spec_results) <- list('spec_results_o', 'spec_results_c', 'spec_results_e',
'spec_results_a', 'spec_results_n')
return(spec_results)
}
# adapted from specr package code
format_results <- function(df, var, null = 0, desc = FALSE) {
# rank specs
if (isFALSE(desc)) {
df <- df %>%
dplyr::arrange(!! var)
} else {
df <- df %>%
dplyr::arrange(desc(!! var))
}
# create rank variable and color significance
df <- df %>%
dplyr::mutate(specifications = 1:nrow(df),
color = case_when(conf.low > null ~ "#377eb8",
conf.high < null ~ "#e41a1c",
TRUE ~ "darkgrey"))
return(df)
}
# define function to plot single specification curve
plot_curves <- function(results, filename, hr=F){
file_path_eps <- paste0("../../Plots/US/", filename,".eps")
file_path_pdf <- paste0("../../Plots/US/", filename,".pdf")
file_path_png <- paste0("../../Plots/US/", filename,".png")
if(hr==F){
results %>%
format_results(var = .$estimate, null = 0, desc = F) %>%
ggplot(aes(x = specifications,
y = estimate,
ymin = conf.low,
ymax = conf.high,
color = color)) +
geom_point(aes(color = color),
size = 1) +
theme_minimal() +
scale_color_identity() +
theme(strip.text = element_blank(),
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")) +
labs(x = "") +
geom_pointrange(alpha = 0.05,
size = .6,
fatten = 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
labs(x = "", y = "standarized coefficient") +
coord_fixed(ratio = 2000, ylim = c(-0.4, 0.4)) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
ggsave(file=file_path_eps, device = 'eps')+
ggsave(file=file_path_pdf, device = 'pdf')+
ggsave(file=file_path_png, device = 'png')
}else{
results %>%
format_results(var = .$estimate, null = 1, desc = F) %>%
ggplot(aes(x = specifications,
y = estimate,
ymin = conf.low,
ymax = conf.high,
color = color)) +
geom_point(aes(color = color),
size = 1) +
theme_minimal() +
scale_color_identity() +
theme(strip.text = element_blank(),
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")) +
labs(x = "") +
geom_pointrange(alpha = 0.05,
size = .6,
fatten = 1) +
geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
labs(x = "", y = "standarized coefficient") +
coord_fixed(ratio = 2000, ylim = c(0.6, 1.4)) +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
ggsave(file=file_path_eps, device = 'eps')+
ggsave(file=file_path_pdf, device = 'pdf')+
ggsave(file=file_path_png, device = 'png')
}
}
# define function to plot all curves in list
plot_all_curves <- function(ls, analysis, hr=F){
pers <- c('o', 'c', 'e', 'a', 'n')
filenames <- as.list(paste0(analysis, '_', pers))
hr <- as.list(rep(hr, 5))
pmap(list(ls, filenames, hr), plot_curves)
}
# define function to calculate summary stats for single specification curve
calc_summary <- function(df){
dft <- df %>% select(estimate:fit_nobs)
dft <- dft %>% mutate(significant = as.numeric(p.value<0.05),
positive = as.numeric(statistic>0),
negative = as.numeric(statistic<0),
significant_positive = as.numeric(p.value<0.05 & statistic>0),
significant_negative = as.numeric(p.value<0.05 & statistic<0))
mean_temp <- dft %>% map_if(is.numeric, mean, .else=NULL) %>% as.data.frame()
sd_temp <- dft %>% map_if(is.numeric, sd, .else=NULL) %>% as.data.frame()
df_temp <- rbind(mean_temp, sd_temp)
row.names(df_temp) <- c('mean', 'sd')
return(df_temp)
}
# define function to calculate summary stats for all curves in list
calc_all_summaries <- function(ls){
ls <- ls %>% map(calc_summary)
names(ls) <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')
return(ls)
}
coef_to_hr <- function(df){
df %>% mutate(conf.low = exp(estimate-1.96*std.error),
conf.high = exp(estimate+1.96*std.error),
estimate = exp(estimate))
}
filter_socdist <- function(df){
df %>% filter(str_detect(controls, 'onset_prev') &
str_detect(controls, 'slope_prev'))
}
covariates <- c("airport_dist", "conservative", 'male', 'age', 'popdens',
'manufact', 'tourism', 'academics', 'medinc', 'healthcare',
'temp')
cox_model <- function(formula, data){
formula <- as.formula(formula)
coxph(formula = formula, data = data)}
cox_onset_prev <- spec_calculate(df = df_us_prev_scaled,
y = "Surv(onset_prev, event)",
model = "cox_model",
controls = covariates %>% append('onset_prev_lag'),
all.comb = T)
cox_onset_prev_hr <- cox_onset_prev %>% map(coef_to_hr)
calc_all_summaries(cox_onset_prev_hr)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.16172145 0.026014376 5.766790 0.007433825 1.10404476 1.22241719 2366
## sd 0.06206793 0.001120517 2.218639 0.040648218 0.06034861 0.06388357 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 638.2887 2.434685e-35 846.4471
## sd 0 136.8117 1.056543e-33 203.5470
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 1.186354e-35 769.6777 9.006769e-36 NA
## sd 4.978832e-34 172.8896 3.775740e-34 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23515525 0.9999908 0.68063283
## sd NA 0.04511866 0.0000000 0.01744978
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0065112174 -13403.98321 26821.9664 26860.8760 2366
## sd 0.0002305437 68.40587 134.2635 127.4085 0
## significant positive negative significant_positive significant_negative
## mean 0.9633789 1 0 0.9633789 0
## sd 0.1878526 0 0 0.1878526 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.14595693 0.023675413 5.720862 0.001312684 1.09398662 1.20039740 2366
## sd 0.04204126 0.000533867 1.536933 0.009826545 0.03991021 0.04431706 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 634.9587 6.334368e-13 836.1058
## sd 0 158.0395 4.053783e-11 223.5635
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 5.061211e-13 753.4356 4.799701e-13 NA
## sd 3.238962e-11 188.4106 3.071593e-11 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23363277 0.9999908 0.67911602
## sd NA 0.05275286 0.0000000 0.02191225
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0064685052 -13405.64817 26825.2963 26864.2060 2366
## sd 0.0002280484 79.01976 155.5847 148.9729 0
## significant positive negative significant_positive significant_negative
## mean 0.99414062 1 0 0.99414062 0
## sd 0.07633129 0 0 0.07633129 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.0053968 0.0230413250 0.2407622 0.5618723 0.96101875 1.05182497 2366
## sd 0.0210153 0.0004509163 0.9189734 0.2847874 0.02088586 0.02111908 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 601.0713 3.043490e-07 811.6531
## sd 0 156.6765 1.947733e-05 226.5546
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 3.236460e-07 731.9152 3.223853e-07 NA
## sd 2.071186e-05 193.9189 2.063117e-05 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.22261503 0.9999908 0.67472760
## sd NA 0.05272031 0.0000000 0.02245513
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0065180540 -13422.59188 26859.1838 26898.0934 2366
## sd 0.0002496992 78.33827 154.1384 147.2827 0
## significant positive negative significant_positive significant_negative
## mean 0.06713867 0.5056152 0.4943848 0.06713867 0
## sd 0.25029256 0.5000295 0.5000295 0.25029256 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.12712077 0.0241346132 4.911333 0.02630132 1.07503821 1.18172942 2366
## sd 0.05222035 0.0007992203 1.927298 0.11581101 0.04966844 0.05496148 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 627.6168 7.129527e-12 832.9647
## sd 0 157.1791 4.562814e-10 223.8587
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 5.323420e-12 751.675 5.166855e-12 NA
## sd 3.406905e-10 190.272 3.306702e-10 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23126981 0.9999908 0.67999298
## sd NA 0.05263276 0.0000000 0.02169922
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0064707427 -13409.31914 26832.6383 26871.5479 2366
## sd 0.0002249616 78.58953 154.7361 148.1604 0
## significant positive negative significant_positive
## mean 0.9313965 0.99462891 0.005371094 0.9313965
## sd 0.2528096 0.07309959 0.073099587 0.2528096
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 0.87994919 0.0254283747 -5.139354 0.006040369 0.83711030 0.92498283 2366
## sd 0.04229444 0.0008502208 2.109454 0.019840097 0.03905764 0.04576956 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 1917 631.3160 1.042710e-34 830.4507
## sd 0 135.0754 6.242899e-33 207.2895
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 8.175495e-34 746.0484 4.523746e-34 NA
## sd 4.972358e-32 170.8610 2.735136e-32 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.23293232 0.9999908 0.67921943
## sd NA 0.04458783 0.0000000 0.01770616
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0064412561 -13407.4695 26828.939 26867.8487 2366
## sd 0.0001869198 67.5377 132.492 125.5353 0
## significant positive negative significant_positive significant_negative
## mean 0.9619141 0 1 0 0.9619141
## sd 0.1914271 0 0 0 0.1914271
plot_all_curves(cox_onset_prev_hr, 'cox_onset_prev_hr', hr = T)
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
cox_onset_prev_ctrl <- coxph(Surv(onset_prev, event) ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + onset_prev_lag,
data = df_us_prev_scaled)
summary(cox_onset_prev_ctrl)
## Call:
## coxph(formula = Surv(onset_prev, event) ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + onset_prev_lag, data = df_us_prev_scaled)
##
## n= 2366, number of events= 1917
##
## coef exp(coef) se(coef) z Pr(>|z|)
## airport_dist -0.06929 0.93306 0.02959 -2.341 0.019219 *
## conservative -0.26778 0.76508 0.02758 -9.709 < 2e-16 ***
## male -0.10249 0.90259 0.02677 -3.828 0.000129 ***
## age -0.11380 0.89244 0.02439 -4.666 3.07e-06 ***
## popdens 0.03785 1.03857 0.02364 1.601 0.109371
## manufact 0.10299 1.10848 0.02806 3.671 0.000242 ***
## tourism 0.05239 1.05379 0.02662 1.968 0.049029 *
## academics 0.17781 1.19460 0.04482 3.968 7.26e-05 ***
## medinc 0.33067 1.39190 0.03620 9.134 < 2e-16 ***
## healthcare 0.11392 1.12067 0.02693 4.230 2.34e-05 ***
## temp 0.32833 1.38865 0.03005 10.926 < 2e-16 ***
## onset_prev_lag -0.31413 0.73042 0.04236 -7.416 1.21e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## airport_dist 0.9331 1.0717 0.8805 0.9888
## conservative 0.7651 1.3071 0.7248 0.8076
## male 0.9026 1.1079 0.8565 0.9512
## age 0.8924 1.1205 0.8508 0.9361
## popdens 1.0386 0.9629 0.9916 1.0878
## manufact 1.1085 0.9021 1.0492 1.1712
## tourism 1.0538 0.9490 1.0002 1.1102
## academics 1.1946 0.8371 1.0941 1.3043
## medinc 1.3919 0.7184 1.2966 1.4942
## healthcare 1.1207 0.8923 1.0630 1.1814
## temp 1.3886 0.7201 1.3092 1.4729
## onset_prev_lag 0.7304 1.3691 0.6722 0.7937
##
## Concordance= 0.709 (se = 0.006 )
## Likelihood ratio test= 896.6 on 12 df, p=<2e-16
## Wald test = 1059 on 12 df, p=<2e-16
## Score (logrank) test = 1203 on 12 df, p=<2e-16
# fixed time windows
lm_slope_prev <- spec_calculate(df = df_us_prev_scaled,
y = "slope_prev",
model = "lm",
controls = covariates %>% append('slope_prev_lag'),
all.comb = T)
calc_all_summaries(lm_slope_prev)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.06650710 0.0218723943 3.051989 0.1060491 0.02361598 0.1093982
## sd 0.03848726 0.0007661755 1.803760 0.2188002 0.03867878 0.0383537
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.16877711 0.16632951 0.9128307 70.25183 1.477756e-24
## sd 0.03743269 0.03708676 0.0202677 14.16632 5.895233e-23
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3136.82893 6291.6579 6343.57847 1965.8421 2358.000000
## sd 1.732262 53.08913 103.6386 96.63557 88.5283 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.7268066 0.9838867 0.01611328 0.7268066
## sd 0 0.4456537 0.1259266 0.12592662 0.4456537
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.08159129 0.0195674938 4.182360 0.01737430 0.04322001 0.11996257
## sd 0.02905970 0.0004474735 1.515199 0.06422916 0.02934306 0.02880029
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17120959 0.16877025 0.91146684 71.33624 8.942856e-10
## sd 0.03958576 0.03924605 0.02140208 15.26681 4.423700e-08
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3133.22547 6284.4509 6336.3715 1960.08931 2358.000000
## sd 1.732262 55.91484 109.2984 102.3039 93.62033 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9228516 1 0 0.9228516
## sd 0 0.2668594 0 0 0.2668594
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.00674942 0.0191251166 0.3431732 0.6112414 -0.03075437 0.04425321
## sd 0.01393104 0.0004257969 0.7179348 0.2737243 0.01343339 0.01445982
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.16442606 0.16196752 0.91517150 67.74067 6.606324e-07
## sd 0.04118872 0.04084978 0.02221979 15.14588 3.531075e-05
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3142.77640 6303.5528 6355.4734 1976.13237 2358.000000
## sd 1.732262 57.88175 113.2271 106.2052 97.41132 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.02978516 0.6547852 0.3452148 0.02978516
## sd 0 0.17001487 0.4754963 0.4754963 0.17001487
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.11095509 0.0199702382 5.583480 0.006947667 0.07179404 0.15011614
## sd 0.03633964 0.0005199555 1.888186 0.042481328 0.03686347 0.03583717
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17639488 0.17397035 0.90861824 74.12927 4.806967e-13
## sd 0.03882516 0.03848915 0.02103938 15.64261 2.739705e-11
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.83790 6269.6758 6321.5964 1947.82611 2358.000000
## sd 1.732262 55.10243 107.6935 100.7668 91.82149 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9714355 1 0 0.9714355
## sd 0 0.1665992 0 0 0.1665992
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.06960205 0.0203062105 -3.442200 0.1089957 -0.10942193 -0.02978217
## sd 0.03665124 0.0004166468 1.838027 0.2464262 0.03637535 0.03694315
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.16969364 0.16724876 0.91232268 70.72756 9.620287e-22
## sd 0.03779965 0.03745861 0.02046116 14.60225 5.487729e-20
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3135.5009 6289.0018 6340.92240 1963.67454 2358.000000
## sd 1.732262 53.5602 104.6036 97.66964 89.39616 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8081055 0.02587891 0.9741211 0
## sd 0 0.3938387 0.15879340 0.1587934 0
## significant_negative
## mean 0.8081055
## sd 0.3938387
plot_all_curves(lm_slope_prev, 'lm_slope_prev')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_slope_prev_ctrl <- lm(slope_prev ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + slope_prev_lag,
data = df_us_prev_scaled)
summary(lm_slope_prev_ctrl)
##
## Call:
## lm(formula = slope_prev ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + slope_prev_lag, data = df_us_prev_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9236 -0.4960 -0.2000 0.2154 6.0413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.003469 0.017857 -0.194 0.84600
## airport_dist 0.008823 0.021113 0.418 0.67606
## conservative -0.247126 0.022130 -11.167 < 2e-16 ***
## male -0.080222 0.019710 -4.070 4.85e-05 ***
## age -0.052478 0.019239 -2.728 0.00642 **
## popdens 0.139040 0.020596 6.751 1.85e-11 ***
## manufact 0.006652 0.022098 0.301 0.76342
## tourism -0.008209 0.020752 -0.396 0.69245
## academics -0.084176 0.038229 -2.202 0.02777 *
## medinc 0.263901 0.030260 8.721 < 2e-16 ***
## healthcare 0.041049 0.023823 1.723 0.08500 .
## temp 0.219069 0.023035 9.510 < 2e-16 ***
## slope_prev_lag 0.303142 0.031452 9.638 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8684 on 2353 degrees of freedom
## Multiple R-squared: 0.2497, Adjusted R-squared: 0.2458
## F-statistic: 65.25 on 12 and 2353 DF, p-value: < 2.2e-16
# variable time windows
lm_slope_prev_var <- spec_calculate(df = df_us_prev_scaled,
y = "slope_prev_var",
model = "lm",
controls = covariates %>% append('slope_prev_var_lag'),
all.comb = T)
calc_all_summaries(lm_slope_prev_var)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.01843360 0.0228448236 0.8092564 0.3374739 -0.02636443 0.06323163
## sd 0.03179557 0.0008021976 1.4119922 0.2928829 0.03186131 0.03180759
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09368869 0.09101223 0.95330359 35.311428 4.358225e-09
## sd 0.02737264 0.02697465 0.01414535 8.558473 1.601068e-07
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3239.79451 6497.58901 6549.50962 2143.4263 2358.000000
## sd 1.732262 35.73047 69.01074 62.48346 64.7363 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.2021484 0.7170410 0.2829590 0.1867676
## sd 0 0.4016514 0.4504917 0.4504917 0.3897724
## significant_negative
## mean 0.01538086
## sd 0.12307716
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.09199460 0.0203745098 4.529303 0.003265235 0.05204079 0.13194841
## sd 0.02635443 0.0003465441 1.337233 0.009768975 0.02675594 0.02596451
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.10121874 0.09856375 0.94935036 38.710210 2.822507e-11
## sd 0.02521521 0.02480452 0.01304579 8.221666 1.221468e-09
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3230.00003 6478.00006 6529.92067 2125.61769 2358.000000
## sd 1.732262 33.10894 63.71753 57.07268 59.63398 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9880371 1 0 0.9880371
## sd 0 0.1087321 0 0 0.1087321
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean -0.007067026 0.0199332406 -0.3585409 0.6301136 -0.046155523 0.03202147
## sd 0.010169162 0.0002821194 0.5126193 0.2278677 0.009878615 0.01048088
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09279622 0.09011762 0.95376562 34.846983 0.0001193256
## sd 0.02829475 0.02789763 0.01461407 8.898946 0.0044740979
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3240.92388 6499.84776 6551.76837 2145.53694 2358.000000
## sd 1.732262 36.85796 71.26061 64.69776 66.91708 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0 0.2226562 0.7773438 0
## sd 0 0 0.4160802 0.4160802 0
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.11504837 0.0208132613 5.555493 0.00127825 0.07423418 0.15586257
## sd 0.03195153 0.0004849158 1.619015 0.00604934 0.03262194 0.03129565
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.10543597 0.10279285 0.94712809 40.680990 8.095085e-15
## sd 0.02407654 0.02366633 0.01247244 8.311559 3.982934e-13
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3224.47345 6466.94691 6518.86751 2115.64393 2358.000000
## sd 1.732262 31.74214 60.99344 54.40905 56.94102 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.99584961 1 0 0.99584961
## sd 0 0.06429754 0 0 0.06429754
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.04812700 0.0211994748 -2.285890 0.1692657 -0.08969855 -0.006555458
## sd 0.03063361 0.0004145531 1.471557 0.2877701 0.03022191 0.031061141
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09553382 0.09286226 0.95234100 36.196044 9.356756e-12
## sd 0.02627732 0.02587729 0.01357951 8.357229 3.360019e-10
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3237.42430 6492.84860 6544.76920 2139.06251 2358.000000
## sd 1.732262 34.34926 66.24897 59.74882 62.14587 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6511230 0.1003418 0.8996582 0
## sd 0 0.4766732 0.3004919 0.3004919 0
## significant_negative
## mean 0.6511230
## sd 0.4766732
plot_all_curves(lm_slope_prev_var, 'lm_slope_prev_var')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_slope_prev_var_ctrl <- lm(slope_prev_var ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + slope_prev_var_lag,
data = df_us_prev_scaled)
summary(lm_slope_prev_var_ctrl)
##
## Call:
## lm(formula = slope_prev_var ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + slope_prev_var_lag, data = df_us_prev_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1372 -0.5606 -0.2239 0.3000 9.4699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002831 0.018948 -0.149 0.881224
## airport_dist 0.021432 0.022402 0.957 0.338819
## conservative -0.223392 0.023463 -9.521 < 2e-16 ***
## male -0.070245 0.020914 -3.359 0.000795 ***
## age -0.060962 0.020410 -2.987 0.002847 **
## popdens 0.111035 0.021855 5.081 4.06e-07 ***
## manufact 0.037805 0.023416 1.614 0.106557
## tourism -0.026901 0.022017 -1.222 0.221893
## academics -0.131707 0.040554 -3.248 0.001180 **
## medinc 0.213479 0.032092 6.652 3.58e-11 ***
## healthcare 0.032853 0.025277 1.300 0.193814
## temp 0.222065 0.024472 9.074 < 2e-16 ***
## slope_prev_var_lag 0.235435 0.034511 6.822 1.14e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9214 on 2353 degrees of freedom
## Multiple R-squared: 0.1553, Adjusted R-squared: 0.151
## F-statistic: 36.04 on 12 and 2353 DF, p-value: < 2.2e-16
lm_cases_0415 <- spec_calculate(df = df_us_death_scaled,
y = "case_rate_0415",
model = "lm",
controls = covariates %>% append('case_rate_0415_lag'),
all.comb = T)
calc_all_summaries(lm_cases_0415)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.04876701 0.0227720892 2.150331 0.1735715 0.004111615 0.09342241
## sd 0.03165959 0.0007998025 1.423872 0.2427764 0.031873299 0.03152256
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09943230 0.09677125 0.95027531 38.13421 1.473910e-15
## sd 0.02766227 0.02732592 0.01433976 10.44299 4.683503e-14
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.25867 6482.51733 6534.43794 2129.84261 2358.000000
## sd 1.732262 36.16042 70.17978 64.65003 65.42127 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.4975586 0.9543457 0.0456543 0.4975586
## sd 0 0.5000551 0.2087597 0.2087597 0.5000551
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.04262502 0.0203947736 2.104424 0.1466779 0.002631474 0.08261857
## sd 0.02066561 0.0004616577 1.037216 0.2339358 0.021205973 0.02015145
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09892688 0.09626521 0.95052224 37.78673 2.060162e-05
## sd 0.03004164 0.02971634 0.01556665 11.45476 9.415328e-04
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.82626 6483.65252 6535.57312 2131.03793 2358.000000
## sd 1.732262 39.13348 76.15729 70.67325 71.04847 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6054688 0.9758301 0.02416992 0.6054688
## sd 0 0.4888094 0.1535952 0.15359524 0.4888094
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.012073121 0.0198842257 0.6045850 0.5663104 -0.026919260 0.051065502
## sd 0.009551023 0.0003069414 0.4745963 0.2407368 0.009344157 0.009790576
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09705206 0.09438499 0.95151078 36.93931 0.0000119453
## sd 0.03006737 0.02973723 0.01556634 11.25001 0.0005722379
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3235.28615 6488.5723 6540.49291 2135.47188 2358.000000
## sd 1.732262 39.11061 76.0876 70.52753 71.10934 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.01464844 0.8945312 0.1054688 0.01464844
## sd 0 0.12015567 0.3071940 0.3071940 0.12015567
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.07795076 0.020834282 3.766163 0.02775633 0.03709534 0.11880617
## sd 0.02637373 0.000592395 1.323556 0.09913529 0.02709396 0.02568587
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.1028759 0.1002257 0.94843600 39.58762 1.825308e-07
## sd 0.0300417 0.0297263 0.01560479 11.81480 7.034892e-06
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3227.62466 6473.24933 6525.16993 2121.69844 2358.000000
## sd 1.732262 39.29875 76.53274 71.18872 71.04863 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9033203 0.99951172 0.0004882812 0.9033203
## sd 0 0.2955572 0.02209439 0.0220943887 0.2955572
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.04774835 0.0211530060 -2.276129 0.1478689 -0.08922878 -0.006267932
## sd 0.02663904 0.0004849494 1.291895 0.2612650 0.02607382 0.027225747
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.09947116 0.09681049 0.9502443 38.13183 2.304518e-12
## sd 0.02894177 0.02861880 0.0150099 11.11191 1.129695e-10
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3232.1560 6482.31207 6534.233 2129.75070 2358.000000
## sd 1.732262 37.7909 73.49507 68.108 68.44729 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6376953 0.05932617 0.9406738 0
## sd 0 0.4807249 0.23626300 0.2362630 0
## significant_negative
## mean 0.6376953
## sd 0.4807249
plot_all_curves(lm_cases_0415, 'lm_cases_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_cases_0415_ctrl <- lm(case_rate_0415 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + case_rate_0415_lag,
data = df_us_death_scaled)
summary(lm_cases_0415_ctrl)
##
## Call:
## lm(formula = case_rate_0415 ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + case_rate_0415_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9724 -0.3185 -0.1425 0.0664 14.0442
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002321 0.018957 -0.122 0.902545
## airport_dist 0.039999 0.022414 1.785 0.074469 .
## conservative -0.192255 0.023487 -8.186 4.38e-16 ***
## male -0.054249 0.020924 -2.593 0.009582 **
## age -0.015480 0.020419 -0.758 0.448449
## popdens 0.197538 0.021860 9.037 < 2e-16 ***
## manufact -0.014696 0.023468 -0.626 0.531255
## tourism -0.015331 0.022028 -0.696 0.486509
## academics -0.148486 0.040571 -3.660 0.000258 ***
## medinc 0.242370 0.032107 7.549 6.24e-14 ***
## healthcare 0.034985 0.025290 1.383 0.166693
## temp 0.155413 0.024405 6.368 2.29e-10 ***
## case_rate_0415_lag 0.187843 0.034440 5.454 5.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9219 on 2353 degrees of freedom
## Multiple R-squared: 0.1545, Adjusted R-squared: 0.1502
## F-statistic: 35.83 on 12 and 2353 DF, p-value: < 2.2e-16
lm_cases_0930 <- spec_calculate(df = df_us_death_scaled,
y = "case_rate_0930",
model = "lm",
controls = covariates %>% append('case_rate_0930_lag'),
all.comb = T)
calc_all_summaries(lm_cases_0930)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean -0.08748817 0.020625280 -4.336426 0.06828373 -0.12793374 -0.04704261
## sd 0.04752857 0.001548285 2.489699 0.17839331 0.04636929 0.04884932
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.2576031 0.2554435 0.86051765 125.08352 1.479292e-08
## sd 0.1115422 0.1115596 0.06377168 61.40988 4.197068e-07
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2991.3823 6000.765 6052.6852 1755.7688 2358.000000
## sd 1.732262 173.8364 345.775 340.4391 263.7973 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8127441 0.01391602 0.9860840 0
## sd 0 0.3901644 0.11715678 0.1171568 0
## significant_negative
## mean 0.8127441
## sd 0.3901644
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.12308312 0.0184266866 6.583651 0.001060154 0.08694894 0.1592173
## sd 0.05683318 0.0009247264 2.811747 0.004025125 0.05550506 0.0581875
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.26752476 0.26538940 0.85530837 130.36518 3.170247e-32
## sd 0.09585838 0.09583076 0.05530759 53.15408 6.694621e-31
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2978.5255 5975.0511 6026.9717 1732.3039 2358.000000
## sd 1.732262 152.1392 302.3326 296.8751 226.7051 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.06069173 0.018035486 3.3280702 0.01604293 0.02532467 0.09605878
## sd 0.01982743 0.001245754 0.9305813 0.04801242 0.01817235 0.02163228
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.2542249 0.2520547 0.8626706 122.18282 5.151662e-06
## sd 0.1072672 0.1072635 0.0612009 57.77144 9.231320e-05
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2997.8262 6013.6525 6065.5731 1763.7582 2358.000000
## sd 1.732262 166.5089 331.0699 325.5924 253.6869 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9135742 1 0 0.9135742
## sd 0 0.2810261 0 0 0.2810261
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.20856723 0.018548064 11.178947 1.455611e-10 0.17219502 0.24493944
## sd 0.06258162 0.000809816 3.068922 1.114284e-09 0.06164063 0.06354836
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.29257675 0.29051179 0.84079681 147.40723 4.922386e-58
## sd 0.08593704 0.08589113 0.05049305 51.53961 1.583845e-56
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2938.7056 5895.4112 5947.3318 1673.0560 2358.000000
## sd 1.732262 141.4434 280.9384 275.4853 203.2411 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.18517486 0.018911721 -9.704595 1.720658e-06 -0.22226019 -0.14808953
## sd 0.05329514 0.001037646 2.400281 2.810106e-05 0.05486468 0.05175801
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.28204011 0.27994829 0.84680190 139.91098 8.135163e-27
## sd 0.09375431 0.09371317 0.05458002 53.56275 2.424387e-25
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -2954.9142 5927.8285 5979.7491 1697.9751 2358.000000
## sd 1.732262 151.5238 301.0411 295.4047 221.7289 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 0 1 0
## sd 0 0 0 0 0
## significant_negative
## mean 1
## sd 0
plot_all_curves(lm_cases_0930, 'lm_cases_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_cases_0930_ctrl <- lm(case_rate_0930 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + case_rate_0930_lag,
data = df_us_death_scaled)
summary(lm_cases_0930_ctrl)
##
## Call:
## lm(formula = case_rate_0930 ~ airport_dist + conservative + male +
## age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + case_rate_0930_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5977 -0.4787 -0.1204 0.3630 6.7757
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.001975 0.015604 -0.127 0.89929
## airport_dist 0.119021 0.018459 6.448 1.37e-10 ***
## conservative -0.214833 0.019372 -11.090 < 2e-16 ***
## male 0.152308 0.017222 8.844 < 2e-16 ***
## age -0.293421 0.016809 -17.457 < 2e-16 ***
## popdens 0.009099 0.017999 0.506 0.61323
## manufact 0.173597 0.019327 8.982 < 2e-16 ***
## tourism -0.021363 0.018148 -1.177 0.23923
## academics -0.107078 0.033555 -3.191 0.00144 **
## medinc -0.025005 0.026524 -0.943 0.34592
## healthcare 0.066311 0.020818 3.185 0.00147 **
## temp 0.483630 0.021645 22.344 < 2e-16 ***
## case_rate_0930_lag 0.226178 0.028937 7.816 8.14e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7589 on 2353 degrees of freedom
## Multiple R-squared: 0.427, Adjusted R-squared: 0.4241
## F-statistic: 146.1 on 12 and 2353 DF, p-value: < 2.2e-16
lm_deaths_0415 <- spec_calculate(df = df_us_death_scaled,
y = "death_rate_0415",
model = "lm",
controls = covariates %>% append('death_rate_0415_lag'),
all.comb = T)
calc_all_summaries(lm_deaths_0415)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.02159386 0.0232443310 0.9352266 0.3927249 -0.02398759 0.06717531
## sd 0.02622739 0.0008315074 1.1494376 0.3084098 0.02646229 0.02609246
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06227496 0.05950046 0.969742909 22.662019 8.001974e-08
## sd 0.01965118 0.01924889 0.009912406 6.298663 2.447263e-06
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.3844 6578.76886 6630.68946 2217.71972 2358.000000
## sd 1.732262 24.7346 47.20338 41.60315 46.47504 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.1784668 0.7832031 0.2167969 0.1784668
## sd 0 0.3829520 0.4121134 0.4121134 0.3829520
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.04372995 0.0207956843 2.1162652 0.1315361 0.002950226 0.08450968
## sd 0.01888260 0.0004178388 0.9341132 0.2095286 0.019457170 0.01832666
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06352029 0.06074950 0.96909709 23.161227 6.476435e-06
## sd 0.01994210 0.01954641 0.01006702 6.585468 2.596939e-04
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3278.80427 6575.60854 6627.52914 2214.77451 2358.000000
## sd 1.732262 25.10753 47.97863 42.46171 47.16306 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.6140137 0.99804688 0.001953125 0.6140137
## sd 0 0.4868868 0.04415638 0.044156385 0.4868868
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean -0.009297587 0.0202759542 -0.4597789 0.6280569 -0.049058136 0.03046296
## sd 0.007132299 0.0002130225 0.3530586 0.1908608 0.007010183 0.00727638
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06151788 0.05874158 0.9701292 22.284747 0.0002391457
## sd 0.02056854 0.02016790 0.0103773 6.638708 0.0091204303
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3281.31517 6580.63034 6632.55094 2219.51021 2358.000000
## sd 1.732262 25.84772 49.42726 43.77151 48.64461 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0 0.08740234 0.9125977 0
## sd 0 0 0.28245823 0.2824582 0
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.08059878 0.0212479128 3.816159 0.01274672 0.03893225 0.12226531
## sd 0.02281645 0.0005825912 1.139402 0.04100308 0.02361434 0.02204891
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06766920 0.06491029 0.966950106 24.935697 1.104777e-08
## sd 0.01948835 0.01910418 0.009860482 6.747553 4.400887e-07
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3273.56134 6565.12267 6617.04328 2204.96235 2358.000000
## sd 1.732262 24.64179 47.10499 41.80496 46.08995 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.9294434 1 0 0.9294434
## sd 0 0.2561141 0 0 0.2561141
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.01275703 0.0215914703 -0.6063945 0.3820066 -0.05509727 0.02958321
## sd 0.02276319 0.0004845586 1.0615056 0.2740656 0.02216699 0.02338281
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.06198511 0.05920994 0.96988961 22.517221 1.683153e-06
## sd 0.02023050 0.01983336 0.01020926 6.553321 3.977900e-05
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3280.73490 6579.46980 6631.3904 2218.40522 2358.000000
## sd 1.732262 25.44329 48.63943 43.0717 47.84512 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.06982422 0.2592773 0.7407227 0
## sd 0 0.25488165 0.4382916 0.4382916 0
## significant_negative
## mean 0.06982422
## sd 0.25488165
plot_all_curves(lm_deaths_0415, 'lm_deaths_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_deaths_0415_ctrl <- lm(death_rate_0415 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + death_rate_0415_lag,
data = df_us_death_scaled)
summary(lm_deaths_0415_ctrl)
##
## Call:
## lm(formula = death_rate_0415 ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + death_rate_0415_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9826 -0.3573 -0.1845 0.0510 14.3516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002784 0.019506 -0.143 0.88650
## airport_dist 0.030827 0.023066 1.336 0.18153
## conservative -0.164472 0.024153 -6.809 1.24e-11 ***
## male -0.058699 0.021529 -2.727 0.00645 **
## age 0.005348 0.021010 0.255 0.79911
## popdens 0.149665 0.022492 6.654 3.53e-11 ***
## manufact 0.001234 0.024105 0.051 0.95919
## tourism -0.038348 0.022667 -1.692 0.09082 .
## academics -0.118379 0.041756 -2.835 0.00462 **
## medinc 0.167830 0.033048 5.078 4.11e-07 ***
## healthcare 0.023905 0.026021 0.919 0.35835
## temp 0.152491 0.025113 6.072 1.47e-09 ***
## death_rate_0415_lag 0.227223 0.034577 6.572 6.11e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9486 on 2353 degrees of freedom
## Multiple R-squared: 0.1048, Adjusted R-squared: 0.1002
## F-statistic: 22.96 on 12 and 2353 DF, p-value: < 2.2e-16
lm_deaths_0930 <- spec_calculate(df = df_us_death_scaled,
y = "death_rate_0930",
model = "lm",
controls = covariates %>% append('death_rate_0930_lag'),
all.comb = T)
calc_all_summaries(lm_deaths_0930)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean -0.04771222 0.021786652 -2.278399 0.2033106 -0.09043520 -0.004989232
## sd 0.04891033 0.001143027 2.365694 0.2823974 0.04739848 0.050476466
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17340678 0.17098490 0.90938022 73.76279 0.0003284423
## sd 0.08211462 0.08202695 0.04520183 38.89783 0.0127438710
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3125.5125 6269.0249 6320.9455 1954.8930 2358.000000
## sd 1.732262 118.5453 235.2744 230.2473 194.2011 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.5175781 0.1660156 0.8339844 0.01733398
## sd 0 0.4997519 0.3721401 0.3721401 0.13052845
## significant_negative
## mean 0.5002441
## sd 0.5000610
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean 0.1268692 0.019400070 6.486926 0.001113984 0.08882628 0.16491221
## sd 0.0530807 0.000431052 2.614407 0.003196235 0.05230910 0.05385451
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.18772875 0.18534406 0.90182774 81.06360 3.020223e-31
## sd 0.06656885 0.06643439 0.03691875 31.69162 9.761146e-30
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3106.72700 6231.4540 6283.3746 1921.0215 2358.000000
## sd 1.732262 97.68093 193.4857 188.3267 157.4353 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean 0.006814176 0.0190529070 0.3370688 0.5684355 -0.03054801 0.04417636
## sd 0.014399735 0.0008842485 0.7376599 0.2709864 0.01342749 0.01550550
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.17020480 0.16777296 0.91123005 71.84871 0.0002407612
## sd 0.07920734 0.07910241 0.04344308 36.74472 0.0097444833
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3130.5609 6279.1219 6331.0425 1962.4657 2358.000000
## sd 1.732262 113.5638 225.2666 220.1163 187.3254 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.01293945 0.6718750 0.3281250 0.01293945
## sd 0 0.11302718 0.4695879 0.4695879 0.11302718
## significant_negative
## mean 0
## sd 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean 0.20456071 0.0195666193 10.433477 1.450827e-09 0.16619114 0.24293027
## sd 0.05862264 0.0002714668 2.928455 1.173231e-08 0.05832843 0.05892019
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.21065181 0.20833117 0.88918037 93.99839 4.111013e-56
## sd 0.05703859 0.05688833 0.03205223 29.92537 1.256598e-54
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3073.75974 6165.5195 6217.4401 1866.8085 2358.000000
## sd 1.732262 86.01007 170.1551 165.0649 134.8963 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean -0.09751528 0.0201788237 -4.781985 0.06488922 -0.13708536 -0.05794521
## sd 0.04905087 0.0007283533 2.339181 0.18515988 0.04998104 0.04814510
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.18025226 0.17784851 0.90582179 76.95645 5.561893e-13
## sd 0.07348383 0.07335924 0.04048169 34.39706 2.643324e-11
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 7.000000 -3116.8080 6251.6161 6303.537 1938.7034 2358.000000
## sd 1.732262 106.3505 210.8025 205.555 173.7892 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8454590 0.01293945 0.9870605 0
## sd 0 0.3615107 0.11302718 0.1130272 0
## significant_negative
## mean 0.8454590
## sd 0.3615107
plot_all_curves(lm_deaths_0930, 'lm_deaths_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_deaths_0930_ctrl <- lm(death_rate_0930 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + death_rate_0930_lag,
data = df_us_death_scaled)
summary(lm_deaths_0930_ctrl)
##
## Call:
## lm(formula = death_rate_0930 ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + death_rate_0930_lag, data = df_us_death_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9380 -0.5061 -0.1812 0.2849 5.7208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.001376 0.017228 -0.080 0.93636
## airport_dist 0.094936 0.020382 4.658 3.37e-06 ***
## conservative -0.326855 0.021335 -15.320 < 2e-16 ***
## male -0.078391 0.019013 -4.123 3.87e-05 ***
## age -0.056769 0.018559 -3.059 0.00225 **
## popdens 0.087295 0.019869 4.394 1.16e-05 ***
## manufact 0.036678 0.021292 1.723 0.08509 .
## tourism -0.068205 0.020024 -3.406 0.00067 ***
## academics -0.248127 0.036920 -6.721 2.26e-11 ***
## medinc 0.068018 0.029185 2.331 0.01986 *
## healthcare 0.039224 0.022986 1.706 0.08805 .
## temp 0.453554 0.023106 19.630 < 2e-16 ***
## death_rate_0930_lag 0.171593 0.031177 5.504 4.12e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8379 on 2353 degrees of freedom
## Multiple R-squared: 0.3015, Adjusted R-squared: 0.2979
## F-statistic: 84.64 on 12 and 2353 DF, p-value: < 2.2e-16
cox_onset_socdist <- spec_calculate(df = df_us_socdist_scaled,
y = "Surv(cpt_day_socdist_2, event)",
model = "cox_model",
controls = covariates %>%
append(c('cpt_day_socdist_2_lag', 'onset_prev', 'slope_prev')),
all.comb = T)
cox_onset_socdist <- cox_onset_socdist %>% map(filter_socdist)
cox_onset_socdist_hr <- cox_onset_socdist %>% map(coef_to_hr)
calc_all_summaries(cox_onset_socdist_hr)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 0.90084718 0.0251620611 -4.186946 0.001514851 0.85747908 0.94641117 2366
## sd 0.02327209 0.0008332922 1.134641 0.004378419 0.02144995 0.02529138 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 234.86781 5.412147e-22 224.69959
## sd 0 66.65304 5.712933e-21 65.57137
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 1.657834e-20 223.08702 3.23802e-20 NA
## sd 1.946635e-19 64.68873 4.20639e-19 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.09414009 0.9999987 0.63791890
## sd NA 0.02557470 0.0000000 0.01724889
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0071943487 -15894.97671 31807.95342 31859.87022 2366
## sd 0.0002517063 33.32652 64.80125 60.26247 0
## significant positive negative significant_positive significant_negative
## mean 0.99926758 0 1 0 0.99926758
## sd 0.02705668 0 0 0 0.02705668
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 0.9305632 0.0212930328 -3.439328 0.02976033 0.8924980 0.97025352 2366
## sd 0.0271874 0.0006426558 1.464538 0.04948005 0.0250966 0.02942282 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 230.04233 9.389246e-18 220.31793
## sd 0 65.03569 8.934878e-17 63.96218
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 9.055267e-17 218.1110 1.179495e-16 NA
## sd 8.321553e-16 62.7894 1.056573e-15 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.09230748 0.9999987 0.63684492
## sd NA 0.02505988 0.0000000 0.01686053
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072149160 -15897.38945 31812.77891 31864.69571 2366
## sd 0.0002147792 32.51784 63.01764 57.96853 0
## significant positive negative significant_positive significant_negative
## mean 0.7814941 0 1 0 0.7814941
## sd 0.4132829 0 0 0 0.4132829
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.02426578 0.0213783358 1.1179169 0.3310683 0.98223389 1.0680964 2366
## sd 0.01204374 0.0001072928 0.5488602 0.2332412 0.01152189 0.0125931 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 217.59045 4.479032e-13 207.16637
## sd 0 74.34435 3.637662e-12 73.62063
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 4.870524e-12 205.28108 6.898468e-12 NA
## sd 3.612500e-11 72.55058 4.849513e-11 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.08741196 0.9999987 0.63445954
## sd NA 0.02878782 0.0000000 0.01893728
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072780187 -15903.61539 31825.23079 31877.14759 2366
## sd 0.0002501295 37.17218 72.42525 67.58321 0
## significant positive negative significant_positive significant_negative
## mean 0.03979492 1 0 0.03979492 0
## sd 0.19550094 0 0 0.19550094 0
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.00925711 0.0215170876 0.3206578 0.1308660 0.96752121 1.05279642 2366
## sd 0.04240161 0.0008720833 1.9443295 0.1687364 0.03927003 0.04575063 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 219.91522 3.098970e-13 209.08456
## sd 0 74.34997 2.452981e-12 72.81736
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 3.048073e-12 207.31414 4.251963e-12 NA
## sd 2.230823e-11 71.92873 2.986865e-11 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.08830803 0.9999987 0.63489406
## sd NA 0.02877147 0.0000000 0.01796452
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072595031 -15902.45301 31822.90601 31874.82281 2366
## sd 0.0002430652 37.17499 72.45016 67.66904 0
## significant positive negative significant_positive significant_negative
## mean 0.4101562 0.5031738 0.4968262 0.2971191 0.1130371
## sd 0.4919219 0.5000510 0.5000510 0.4570452 0.3166768
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high fit_n
## mean 1.07129629 0.0227821273 3.028251 0.05228809 1.02453397 1.12019435 2366
## sd 0.02975165 0.0005711819 1.264375 0.10005770 0.02922361 0.03029421 0
## fit_nevent fit_statistic.log fit_p.value.log fit_statistic.sc
## mean 2365 226.77298 2.340417e-18 216.21052
## sd 0 66.03596 2.269295e-17 64.65633
## fit_p.value.sc fit_statistic.wald fit_p.value.wald fit_statistic.robust
## mean 1.974366e-17 213.98344 2.566388e-17 NA
## sd 1.664079e-16 63.52385 2.065698e-16 NA
## fit_p.value.robust fit_r.squared fit_r.squared.max fit_concordance
## mean NA 0.09104196 0.9999987 0.63592775
## sd NA 0.02545169 0.0000000 0.01772905
## fit_std.error.concordance fit_logLik fit_AIC fit_BIC fit_nobs
## mean 0.0072763743 -15899.02413 31816.04825 31867.96505 2366
## sd 0.0002555284 33.01798 64.09478 59.27922 0
## significant positive negative significant_positive significant_negative
## mean 0.7404785 1 0 0.7404785 0
## sd 0.4384256 0 0 0.4384256 0
plot_all_curves(cox_onset_socdist_hr, 'cox_onset_socdist_hr', hr = T)
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
cox_onset_socdist_ctrl <- coxph(Surv(cpt_day_socdist_2, event) ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + onset_prev + slope_prev +
cpt_day_socdist_2_lag,
data = df_us_socdist_scaled)
summary(cox_onset_socdist_ctrl)
## Call:
## coxph(formula = Surv(cpt_day_socdist_2, event) ~ airport_dist +
## conservative + male + age + popdens + manufact + tourism +
## academics + medinc + healthcare + temp + onset_prev + slope_prev +
## cpt_day_socdist_2_lag, data = df_us_socdist_scaled)
##
## n= 2366, number of events= 2365
##
## coef exp(coef) se(coef) z Pr(>|z|)
## airport_dist -0.12768 0.88013 0.02573 -4.962 6.98e-07 ***
## conservative 0.09538 1.10007 0.02666 3.577 0.000347 ***
## male -0.05323 0.94816 0.02335 -2.279 0.022655 *
## age -0.01756 0.98259 0.02205 -0.797 0.425615
## popdens -0.07439 0.92831 0.02794 -2.663 0.007750 **
## manufact 0.04968 1.05093 0.02457 2.022 0.043180 *
## tourism -0.11767 0.88899 0.02528 -4.655 3.24e-06 ***
## academics 0.02042 1.02063 0.04352 0.469 0.639026
## medinc -0.07733 0.92558 0.03477 -2.224 0.026136 *
## healthcare 0.03931 1.04009 0.02802 1.403 0.160717
## temp -0.29100 0.74752 0.02704 -10.761 < 2e-16 ***
## onset_prev -0.03523 0.96539 0.03418 -1.031 0.302745
## slope_prev -0.11930 0.88754 0.03333 -3.580 0.000344 ***
## cpt_day_socdist_2_lag -0.23676 0.78918 0.04751 -4.983 6.25e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## airport_dist 0.8801 1.1362 0.8368 0.9257
## conservative 1.1001 0.9090 1.0441 1.1591
## male 0.9482 1.0547 0.9057 0.9926
## age 0.9826 1.0177 0.9410 1.0260
## popdens 0.9283 1.0772 0.8789 0.9806
## manufact 1.0509 0.9515 1.0015 1.1028
## tourism 0.8890 1.1249 0.8460 0.9341
## academics 1.0206 0.9798 0.9372 1.1115
## medinc 0.9256 1.0804 0.8646 0.9909
## healthcare 1.0401 0.9615 0.9845 1.0988
## temp 0.7475 1.3378 0.7089 0.7882
## onset_prev 0.9654 1.0359 0.9028 1.0323
## slope_prev 0.8875 1.1267 0.8314 0.9474
## cpt_day_socdist_2_lag 0.7892 1.2671 0.7190 0.8662
##
## Concordance= 0.661 (se = 0.007 )
## Likelihood ratio test= 344.5 on 14 df, p=<2e-16
## Wald test = 320.8 on 14 df, p=<2e-16
## Score (logrank) test = 325.7 on 14 df, p=<2e-16
lm_mean_socdist <- spec_calculate(df = df_us_socdist_scaled,
y = "mean_diff_socdist_2",
model = "lm",
controls = covariates %>%
append(c('mean_diff_socdist_2_lag', 'onset_prev', 'slope_prev')),
all.comb = T)
lm_mean_socdist <- lm_mean_socdist %>% map(filter_socdist)
calc_all_summaries(lm_mean_socdist)
## $pers_o
## estimate std.error statistic p.value conf.low conf.high
## mean 0.14021047 0.0194929117 7.205979 8.705399e-07 0.10198542 0.17843551
## sd 0.03452777 0.0006951486 1.812431 7.476719e-06 0.03461717 0.03449206
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.35328573 0.35083507 0.80530040 146.96585 1.503377e-124
## sd 0.04184288 0.04168147 0.02561953 24.32935 4.585680e-123
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2838.6814 5699.363 5762.8213 1529.47925 2356.000000
## sd 1.732262 75.1761 148.104 141.8935 98.95842 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 1 0 1
## sd 0 0 0 0 0
## significant_negative
## mean 0
## sd 0
##
## $pers_c
## estimate std.error statistic p.value conf.low conf.high
## mean -0.09138612 0.0175028708 -5.202841 0.0001219264 -0.12570875 -0.05706349
## sd 0.02382600 0.0005762093 1.266658 0.0005332005 0.02445777 0.02323204
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.3459287 0.34345301 0.80980432 141.97404 1.794927e-99
## sd 0.0455359 0.04536277 0.02764305 22.82046 5.855583e-98
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2851.70314 5725.4063 5788.8648 1546.8787 2356.000000
## sd 1.732262 80.44351 158.4916 151.8134 107.6924 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 1 0 1 0
## sd 0 0 0 0 0
## significant_negative
## mean 1
## sd 0
##
## $pers_e
## estimate std.error statistic p.value conf.low conf.high
## mean -0.005150281 0.0170325770 -0.3254585 0.3943169 -0.03855068 0.02825012
## sd 0.018590856 0.0005492686 1.0726356 0.2595924 0.01788979 0.01932655
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.33818294 0.33567872 0.81453789 137.17003 1.467243e-86
## sd 0.04833435 0.04817127 0.02914621 23.33049 5.538707e-85
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2865.36119 5752.7224 5816.1809 1565.1974 2356.000000
## sd 1.732262 84.18746 166.0151 159.4343 114.3107 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.04589844 0.2880859 0.7119141 0.03637695
## sd 0 0.20929038 0.4529266 0.4529266 0.18724911
## significant_negative
## mean 0.009521484
## sd 0.097124295
##
## $pers_a
## estimate std.error statistic p.value conf.low conf.high
## mean -0.07066438 0.0179947707 -3.905691 0.02573413 -0.10595161 -0.03537715
## sd 0.03237484 0.0005958765 1.737321 0.07033431 0.03290196 0.03188185
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.34307315 0.34058648 0.81157397 140.18752 5.652261e-98
## sd 0.04555493 0.04538166 0.02759323 22.68894 2.193977e-96
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2856.87873 5735.7575 5799.2160 1553.6320 2356.000000
## sd 1.732262 80.11993 157.8544 151.2078 107.7374 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.8581543 0 1 0
## sd 0 0.3489344 0 0 0
## significant_negative
## mean 0.8581543
## sd 0.3489344
##
## $pers_n
## estimate std.error statistic p.value conf.low conf.high
## mean 0.05788521 0.0182294883 3.197587 0.0937796 0.02213770 0.09363271
## sd 0.03644144 0.0005360911 2.007429 0.2089676 0.03681758 0.03609200
## fit_r.squared fit_adj.r.squared fit_sigma fit_statistic fit_p.value
## mean 0.3416623 0.33917212 0.81234918 139.36614 2.498468e-86
## sd 0.0500918 0.04993868 0.03028064 24.26272 9.696779e-85
## fit_df fit_logLik fit_AIC fit_BIC fit_deviance fit_df.residual
## mean 9.000000 -2858.86918 5739.7384 5803.1969 1556.9687 2356.000000
## sd 1.732262 87.63406 172.9206 166.3678 118.4671 1.732262
## fit_nobs significant positive negative significant_positive
## mean 2366 0.7509766 0.8999023 0.1000977 0.7460938
## sd 0 0.4325002 0.3001668 0.3001668 0.4352977
## significant_negative
## mean 0.004882812
## sd 0.069714828
plot_all_curves(lm_mean_socdist, 'lm_mean_socdist')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
lm_mean_socdist_ctrl <- lm(mean_diff_socdist_2 ~ airport_dist +
conservative + male + age + popdens + manufact +
tourism + academics + medinc + healthcare +
temp + onset_prev + slope_prev +
mean_diff_socdist_2_lag,
data = df_us_socdist_scaled)
summary(lm_mean_socdist_ctrl)
##
## Call:
## lm(formula = mean_diff_socdist_2 ~ airport_dist + conservative +
## male + age + popdens + manufact + tourism + academics + medinc +
## healthcare + temp + onset_prev + slope_prev + mean_diff_socdist_2_lag,
## data = df_us_socdist_scaled)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6309 -0.2614 0.0611 0.3676 2.6202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0023280 0.0157422 0.148 0.88245
## airport_dist -0.0964326 0.0186435 -5.172 2.51e-07 ***
## conservative -0.1728838 0.0201691 -8.572 < 2e-16 ***
## male -0.0058333 0.0175253 -0.333 0.73928
## age 0.0833188 0.0171674 4.853 1.29e-06 ***
## popdens 0.1215938 0.0184869 6.577 5.88e-11 ***
## manufact -0.0236647 0.0195175 -1.212 0.22545
## tourism 0.0744721 0.0184003 4.047 5.35e-05 ***
## academics 0.0745873 0.0341493 2.184 0.02905 *
## medinc 0.2866240 0.0272018 10.537 < 2e-16 ***
## healthcare 0.0008116 0.0210337 0.039 0.96922
## temp 0.0572502 0.0208304 2.748 0.00603 **
## onset_prev -0.0587622 0.0252012 -2.332 0.01980 *
## slope_prev 0.0714963 0.0237801 3.007 0.00267 **
## mean_diff_socdist_2_lag 0.3108043 0.0273882 11.348 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7657 on 2351 degrees of freedom
## Multiple R-squared: 0.4172, Adjusted R-squared: 0.4138
## F-statistic: 120.2 on 14 and 2351 DF, p-value: < 2.2e-16
results_us <- list(cox_onset_prev_hr, lm_slope_prev, lm_slope_prev_var,
lm_cases_0415, lm_cases_0930, lm_deaths_0415, lm_deaths_0930,
cox_onset_socdist_hr, lm_mean_socdist)
names(results_us) <- list('cox_onset_prev_hr', 'lm_slope_prev', 'lm_slope_prev_var',
'lm_cases_0415', 'lm_cases_0930', 'lm_deaths_0415', 'lm_deaths_0930',
'cox_onset_socdist_hr', 'lm_mean_socdist')
save(results_us, file="US_spec_results.RData")
death_temp <- df_us_death_unscaled %>% select(county_fips:death_rate_0930)
onset_temp <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)
df_us_desc <- df_us_socdist_unscaled %>%
plyr::join(death_temp, by="county_fips") %>%
dplyr::select(pers_o, pers_c, pers_e, pers_a, pers_n,
age, male, conservative,
academics, medinc, manufact,
airport_dist, tourism, healthcare, popdens,
temp, onset_prev, slope_prev,
case_rate_0415, case_rate_0930, death_rate_0415, death_rate_0930,
cpt_day_socdist_2, mean_diff_socdist_2)
df_us_desc %>% select(age) %>% summary()
## age
## Min. :22.90
## 1st Qu.:37.70
## Median :40.70
## Mean :40.59
## 3rd Qu.:43.40
## Max. :67.00
us_means <- df_us_desc %>% summarise_all(mean)
us_sd <- df_us_desc %>% summarise_all(sd)
desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame()
names(desc_us) <- c('mean', 'sd')
desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us
a <- df_us_socdist_unscaled %>% select(county_fips, pers_o:pers_n, cpt_day_socdist, mean_diff_socdist)
b <- df_us_prev_unscaled %>% select(county_fips, onset_prev, slope_prev)
c <- df_us_death_unscaled %>% select(county_fips, case_rate_0415:death_rate_0930)
us_joined <- plyr::join_all(list(a, b, c), by='county_fips')
us_joined %>% select(-county_fips) %>% cor(use = 'pairwise.complete')
## pers_o pers_a pers_e pers_c pers_n
## pers_o 1.00000000 -0.15281714 -0.08497946 -0.04802258 -0.22873070
## pers_a -0.15281714 1.00000000 0.24480399 0.65337156 -0.39348271
## pers_e -0.08497946 0.24480399 1.00000000 0.15985220 -0.39790623
## pers_c -0.04802258 0.65337156 0.15985220 1.00000000 -0.40801964
## pers_n -0.22873070 -0.39348271 -0.39790623 -0.40801964 1.00000000
## cpt_day_socdist 0.13992321 0.08453292 0.02403108 0.09085251 -0.12512306
## mean_diff_socdist 0.33391055 -0.23412700 -0.01723667 -0.21269607 0.06173468
## onset_prev -0.29216003 -0.09682226 -0.07478377 -0.09611230 0.26005706
## slope_prev 0.19638054 0.12336567 0.06290779 0.09638924 -0.18203280
## case_rate_0415 0.14548085 0.07765531 0.05022219 0.04619685 -0.12545916
## death_rate_0415 0.08588663 0.09385457 0.02045067 0.05738798 -0.06854497
## case_rate_0930 -0.09448026 0.32267793 0.07255061 0.23696948 -0.20943857
## death_rate_0930 -0.01762812 0.31188291 0.02183955 0.22741935 -0.12688753
## cpt_day_socdist mean_diff_socdist onset_prev slope_prev
## pers_o 0.13992321 0.33391055 -0.29216003 0.19638054
## pers_a 0.08453292 -0.23412700 -0.09682226 0.12336567
## pers_e 0.02403108 -0.01723667 -0.07478377 0.06290779
## pers_c 0.09085251 -0.21269607 -0.09611230 0.09638924
## pers_n -0.12512306 0.06173468 0.26005706 -0.18203280
## cpt_day_socdist 1.00000000 0.15266953 -0.14248381 0.19833550
## mean_diff_socdist 0.15266953 1.00000000 -0.26415088 0.27105720
## onset_prev -0.14248381 -0.26415088 1.00000000 -0.72372322
## slope_prev 0.19833550 0.27105720 -0.72372322 1.00000000
## case_rate_0415 0.15305730 0.24381339 -0.47200894 0.86665276
## death_rate_0415 0.12568626 0.19568034 -0.39929052 0.75658883
## case_rate_0930 0.14915336 -0.40302394 -0.14180907 0.22824887
## death_rate_0930 0.20710211 -0.09734626 -0.27900660 0.51420879
## case_rate_0415 death_rate_0415 case_rate_0930 death_rate_0930
## pers_o 0.14548085 0.08588663 -0.09448026 -0.01762812
## pers_a 0.07765531 0.09385457 0.32267793 0.31188291
## pers_e 0.05022219 0.02045067 0.07255061 0.02183955
## pers_c 0.04619685 0.05738798 0.23696948 0.22741935
## pers_n -0.12545916 -0.06854497 -0.20943857 -0.12688753
## cpt_day_socdist 0.15305730 0.12568626 0.14915336 0.20710211
## mean_diff_socdist 0.24381339 0.19568034 -0.40302394 -0.09734626
## onset_prev -0.47200894 -0.39929052 -0.14180907 -0.27900660
## slope_prev 0.86665276 0.75658883 0.22824887 0.51420879
## case_rate_0415 1.00000000 0.80803632 0.19781085 0.46226490
## death_rate_0415 0.80803632 1.00000000 0.14786594 0.48273071
## case_rate_0930 0.19781085 0.14786594 1.00000000 0.59222976
## death_rate_0930 0.46226490 0.48273071 0.59222976 1.00000000
us_means <- us_joined %>% select(-county_fips) %>% summarise_all(mean)
us_sd <- us_joined %>% select(-county_fips) %>% summarise_all(sd)
desc_us <- rbind(us_means, us_sd) %>% t() %>% round(3) %>% as.data.frame() %>%
rename(mean=V1, sd=V2)
desc_us %>% rownames_to_column() %>% write_csv('us_descriptives.csv')
desc_us
# get county names
counties <- read_csv('US_personality_2.csv') %>%
select(county, county_name) %>%
dplyr::rename(county_fips = county) %>%
mutate(county_fips = as.character(county_fips)) %>%
distinct()
df_us_prev <- read_csv('US_prevalence.csv')
df_us_prev <- df_us_prev %>%
select(fips, date, rate) %>%
mutate(date = as.Date(date, "%d%b%Y")) %>%
dplyr::rename(county_fips = fips,
rate_day = rate) %>%
mutate(county_fips = as.character(county_fips)) %>%
filter(rate_day >= 0 & rate_day <= 1000)
# merge with processed data frames
df_us_prev <- df_us_prev %>% inner_join(counties, by = 'county_fips')
df_us_socdist <- df_us_socdist %>% inner_join(counties, by = 'county_fips')
gg_prev <- df_us_prev %>%
filter(county_fips == '8037' | county_fips == '19079') %>%
filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
mutate(window = ifelse(date >= '2020-03-15' & date <= '2020-04-15',
'in window', 'out of window')) %>%
ggplot(aes(x=date, y=rate_day)) +
geom_point(aes(col=county_name)) +
geom_line(aes(col=county_name)) +
theme_light() +
theme(legend.position="bottom", legend.margin=margin(t=-20)) +
guides(color=guide_legend(title="")) +
annotate('rect', xmin = as.Date('2020-03-15'),
xmax = as.Date('2020-04-15'),
ymin = -Inf, ymax = Inf, alpha = 0.07) +
geom_segment(aes(x = as.Date('2020-04-07'), y = 0,
xend = as.Date('2020-04-07'), yend = 1),
colour = "#00BFC4") +
geom_segment(aes(x = as.Date('2020-03-09'), y = 0,
xend = as.Date('2020-03-09'), yend = 1),
colour = "#F8766D") +
annotate("text", x = as.Date('2020-04-07'),
y = 2, label = "Onset \n Hamilton County", color = "#00BFC4", size = 3) +
annotate("text", x = as.Date('2020-03-09'),
y = 2, label = "Onset \n Eagle County", color = "#F8766D", size = 3) +
annotate("text", x = as.Date('2020-03-31'),
y = 9.5, label = "Time window used for\nanalysis of growth rates",
color = "grey50", size = 3) +
ylab('Prevalence') +
xlab('') +
ylim(c(-1,10)) +
ggtitle("COVID-19 - comparison of onsets and \n growth rates in two counties") +
theme(plot.title = element_text(size=11)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
gg_prev
gg_socdist <- df_us_socdist %>%
filter(county_fips == 19137 | county_fips == 36081) %>%
filter(date >= '2020-03-1' & date <= '2020-05-01') %>%
ggplot(aes(x=date, y=socdist_tiles)) +
#facet_wrap(~countyname) +
geom_point(aes(col=county_name)) +
geom_line(aes(col=county_name)) +
theme_light() +
theme(legend.position="bottom",legend.margin=margin(t=-20)) +
guides(color=guide_legend(title="")) +
geom_segment(aes(x = as.Date('2020-03-23'), y = -.6,
xend = as.Date('2020-03-23'), yend = 0),
colour = "#00BFC4") +
geom_segment(aes(x = as.Date('2020-03-23'), y = -.6,
xend = as.Date('2020-04-28'), yend = -.6),
colour = "#00BFC4") +
geom_segment(aes(x = as.Date('2020-03-14'), y = -.156,
xend = as.Date('2020-03-14'), yend = .17),
colour = "#F8766D") +
geom_segment(aes(x = as.Date('2020-03-14'), y = -.156,
xend = as.Date('2020-04-28'), yend = -.156),
colour = "#F8766D") +
annotate("text", x = as.Date('2020-03-26'),
y = .08, label = "Change point\nQueens County", color = "#00BFC4", size = 3) +
annotate("text", x = as.Date('2020-03-16'),
y = .25, label = "Change point\nMontgommery County", color = "#F8766D", size = 3) +
ylab('Social distancing') +
xlab("") +
ylim(c(-.7,.3)) +
ggtitle("Social distancing - comparison of change \n points and means in two counties") +
theme(plot.title = element_text(size=11)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
gg_socdist
figure <- ggarrange(gg_prev, gg_socdist,
labels = c("A", "B"),
ncol = 2, nrow = 1)
figure
# create sequence of dates
date_sequence <- seq.Date(min(df_us_prev$date),
max(df_us_prev$date), 1)
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
tto <- df_us_prev_unscaled %>%
merge(df_dates, by.x = 'onset_prev', by.y = 'time') %>%
filter(event==1) %>%
ggplot(aes(x=date+1)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - COVID-19 Onset') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Date of March')
soa <- df_us_slope_prev %>%
ggplot(aes(x=slope_prev)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - COVID-19 Growth Rates') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Standardized growth rates')
figure <- ggarrange(tto, soa,
labels = c("", ""),
ncol = 2, nrow = 1)
figure
tto <- df_us_death_unscaled %>%
ggplot(aes(x=case_rate_0930)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Cumulative Case Rates') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Cases Per 1000 Inhabitants')
soa <- df_us_death_unscaled %>%
ggplot(aes(x=death_rate_0930)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Cumulative Death Rates') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Deaths Per 1000 Inhabitants')
figure <- ggarrange(tto, soa,
labels = c("", ""),
ncol = 2, nrow = 1)
figure
# create sequence of dates
date_sequence <- seq.Date(min(df_us_socdist$date),
max(df_us_socdist$date), 1)
# create data frame with time sequence
df_dates = data.frame(date_sequence, 1:length(date_sequence))
names(df_dates) <- c('date', 'time')
tto <- df_us_cpt_socdist %>%
merge(df_dates, by.x = 'cpt_day_socdist_2', by.y = 'time') %>%
filter(cpt_day_socdist_2 < 30) %>%
ggplot(aes(x=date+1)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Time to Adoption') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Date') +
xlim(c(as.Date('2020-03-01'), as.Date('2020-03-31')))
soa <- df_us_cpt_socdist %>%
ggplot(aes(x=mean_diff_socdist_2)) +
geom_histogram(bins=15) +
theme_light() +
ggtitle('Distribution - Strength of Adjustment') +
theme(plot.title = element_text(size=11)) +
ylab('Absolute frequency') +
xlab('Standardized mean difference')
figure <- ggarrange(tto, soa,
labels = c("", ""),
ncol = 2, nrow = 1)
figure
df_us_google <- read_csv("../Google/2020_US_Region_Mobility_Report.csv")
df_us_fb <- df_us_socdist %>%
select(county_fips, date, socdist_tiles, socdist_single_tile)
df_us_ggl <- df_us_google %>%
select(census_fips_code, date,
retail_and_recreation_percent_change_from_baseline:residential_percent_change_from_baseline) %>%
drop_na() %>%
mutate(county_fips = as.character(as.numeric(census_fips_code))) %>%
select(-census_fips_code)
df_us_fb_ggl <- df_us_fb %>% plyr::join(df_us_ggl, c('county_fips', 'date')) %>% drop_na()
df_us_fb_ggl %>% split(.$county_fips) %>%
map(~ map_if(., is.numeric, scale)) %>%
bind_rows() %>%
select(socdist_tiles:residential_percent_change_from_baseline) %>%
drop_na() %>%
cor() %>%
as.data.frame() %>%
select(c(1,2)) %>% round(3) %>% write.csv()
## "","socdist_tiles","socdist_single_tile"
## "socdist_tiles",1,-0.971
## "socdist_single_tile",-0.971,1
## "retail_and_recreation_percent_change_from_baseline",0.945,-0.939
## "grocery_and_pharmacy_percent_change_from_baseline",0.766,-0.795
## "parks_percent_change_from_baseline",0.44,-0.442
## "transit_stations_percent_change_from_baseline",0.889,-0.888
## "workplaces_percent_change_from_baseline",0.919,-0.925
## "residential_percent_change_from_baseline",-0.906,0.897
load("US_spec_results.RData")
load("../GER/GER_spec_results.RData")
plot_dist <- function(df, filename){
w <- 5
h <- 5
file_path_eps <- paste0("../../Plots/COMP/", filename,".eps")
file_path_pdf <- paste0("../../Plots/COMP/", filename,".pdf")
file_path_png <- paste0("../../Plots/COMP/", filename,".png")
ggplot(df, aes(x=estimate, fill=country), alpha=.3) +
geom_histogram(alpha=.5, position="identity") +
#coord_fixed(ratio = 0.1) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black")) +
theme(axis.title.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank()) +
theme(legend.position="none") +
ggsave(file=file_path_eps, device = 'eps', width=w, height=h)+
ggsave(file=file_path_pdf, device = 'pdf', width=w, height=h)+
ggsave(file=file_path_png, device = 'png', width=w, height=h)
}
# define function to plot all curves in list
plot_all_dist <- function(ls, analysis, hr=F){
pers <- c('o', 'c', 'e', 'a', 'n')
filenames <- as.list(paste0(analysis, '_', pers))
pmap(list(ls, filenames), plot_dist)
}
join_results <- function(list_us, list_ger){
US = "US"
GER = "GER"
list_us <- list_us %>%
map(~cbind(.x, US) %>% rename(country=US))
list_ger <- list_ger %>%
map(~cbind(.x, GER) %>% rename(country=GER))
map2(list_us, list_ger, rbind)
}
join_results(results_us$cox_onset_prev_hr, results_ger$cox_onset_prev_hr) %>%
plot_all_dist(analysis = 'cox_onset_prev_hr')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_slope_prev, results_ger$lm_slope_prev) %>%
plot_all_dist(analysis = 'lm_slope_prev')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_slope_prev_var, results_ger$lm_slope_prev_var) %>%
plot_all_dist(analysis = 'lm_slope_prev_var')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_cases_0415, results_ger$lm_cases_0331) %>%
plot_all_dist(analysis = 'lm_cases_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_cases_0930, results_ger$lm_cases_0930) %>%
plot_all_dist(analysis = 'lm_cases_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_deaths_0415, results_ger$lm_deaths_0331) %>%
plot_all_dist(analysis = 'lm_deaths_0415')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_deaths_0930, results_ger$lm_deaths_0930) %>%
plot_all_dist(analysis = 'lm_deaths_0930')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$cox_onset_socdist_hr, results_ger$cox_onset_socdist_hr) %>%
plot_all_dist(analysis = 'cox_onset_socdist_hr')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
join_results(results_us$lm_mean_socdist, results_ger$lm_mean_socdist) %>%
plot_all_dist(analysis = 'lm_mean_socdist')
## $spec_results_o
##
## $spec_results_c
##
## $spec_results_e
##
## $spec_results_a
##
## $spec_results_n
calculate_effsize <- function(df){
df %>% summarise(d = effsize::cohen.d(estimate ~ country)$estimate,
pval = t.test(estimate ~ country, var.equal = TRUE)$p.value)
}
all_ttest <- map2(results_us, results_ger, join_results) %>% flatten() %>%
map(calculate_effsize) %>% bind_rows(.id = "model")
models <- rep(names(results_us), each=5)
all_ttest <- cbind(models, all_ttest)
all_ttest
save(all_ttest, file='ttest_results.RData')
Social Distancing - Change point analysis